예제 #1
0
    def process_query_result(self,
                             xml_response,
                             searchRequest,
                             maxResults=None):
        self.debug("Started processing results")
        countRejected = self.getRejectedCountDict()
        acceptedEntries = []
        entries, total, offset = self.parseXml(xml_response, maxResults)

        for entry in entries:
            accepted, reason, ri = self.accept_result(entry, searchRequest,
                                                      self.supportedFilters)
            if accepted:
                acceptedEntries.append(entry)
            else:
                countRejected[ri] += 1
                self.debug("Rejected search result. Reason: %s" % reason)

        if total == 0 or len(acceptedEntries) == 0:
            self.info("Query returned no results")
            return IndexerProcessingResult(entries=acceptedEntries,
                                           queries=[],
                                           total=0,
                                           total_known=True,
                                           has_more=False,
                                           rejected=countRejected)
        else:
            return IndexerProcessingResult(
                entries=acceptedEntries,
                queries=[],
                total=total,
                total_known=True,
                has_more=offset + len(entries) < total,
                rejected=countRejected)
예제 #2
0
    def process_query_result(self, xml, searchRequest, maxResults=None):
        self.debug("Started processing results")
        entries = []
        countRejected = self.getRejectedCountDict()
        try:
            tree = ET.fromstring(xml)
        except Exception:
            self.exception("Error parsing XML: %s..." % xml[:500])
            self.debug(xml[:500])
            raise IndexerResultParsingException(
                "Error while parsing XML from NZBClub", self)

        for item in tree.iter('item'):
            try:
                entry = self.parseItem(item)
            except IndexerResultParsingRowException:
                continue

            accepted, reason, ri = self.accept_result(entry, searchRequest,
                                                      self.supportedFilters)
            if accepted:
                entries.append(entry)
            else:
                countRejected[ri] += 1
                self.debug("Rejected search result. Reason: %s" % reason)

        self.debug("Finished processing results")
        return IndexerProcessingResult(
            entries=entries,
            queries=[],
            total=len(entries),
            total_known=True,
            has_more=False,
            rejected=countRejected
        )  # No paging with RSS. Might need/want to change to HTML and BS
예제 #3
0
파일: womble.py 프로젝트: putneyj/nzbhydra
    def process_query_result(self, xml, searchRequest, maxResults=None):
        entries = []
        countRejected = self.getRejectedCountDict()
        try:
            tree = ET.fromstring(xml)
        except Exception:
            self.exception("Error parsing XML: %s..." % xml[:500])
            logger.debug(xml)
            raise IndexerResultParsingException("Error parsing XML", self)
        for elem in tree.iter('item'):
            title = elem.find("title")
            url = elem.find("enclosure")
            pubdate = elem.find("pubDate")
            if title is None or url is None or pubdate is None:
                continue

            entry = self.create_nzb_search_result()
            entry.title = title.text
            entry.link = url.attrib["url"]
            entry.has_nfo = NzbSearchResult.HAS_NFO_NO

            p = re.compile("(.*)\(Size:(\d*)")
            m = p.search(elem.find("description").text)
            if m:
                entry.description = m.group(1)
                entry.size = int(m.group(2)) * 1024 * 1024  #megabyte to byte
            if elem.find("category").text.lower() == "tv-dvdrip" or elem.find(
                    "category").text.lower() == "tv-sd":
                entry.category = getCategoryByName("tvsd")
            elif elem.find("category").text.lower() == "tv-x264" or elem.find(
                    "category").text.lower == "tv-hd":
                entry.category = getCategoryByName("tvhd")
            else:
                entry.category = getUnknownCategory()

            entry.indexerguid = elem.find("guid").text[
                30:]  #39a/The.Almighty.Johnsons.S03E06.720p.BluRay.x264-YELLOWBiRD.nzb is the GUID, only the 39a doesn't work

            pubdate = arrow.get(pubdate.text, 'M/D/YYYY h:mm:ss A')
            entry.epoch = pubdate.timestamp
            entry.pubdate_utc = str(pubdate)
            entry.pubDate = pubdate.format("ddd, DD MMM YYYY HH:mm:ss Z")
            entry.age_days = (arrow.utcnow() - pubdate).days
            entry.age = (arrow.utcnow() - pubdate).days

            accepted, reason, ri = self.accept_result(entry, searchRequest,
                                                      self.supportedFilters)
            if accepted:
                entries.append(entry)
            else:
                countRejected[ri] += 1
                self.debug("Rejected search result. Reason: %s" % reason)

        return IndexerProcessingResult(entries=entries,
                                       queries=[],
                                       total_known=True,
                                       has_more=False,
                                       total=len(entries),
                                       rejected=countRejected)
예제 #4
0
    def process_query_result(self, html, searchRequest, maxResults=None):
        self.debug("Started processing results")

        entries = []
        countRejected = self.getRejectedCountDict()
        logger.debug("Using HTML parser %s" % config.settings.searching.htmlParser)
        soup = BeautifulSoup(html, config.settings.searching.htmlParser)
        main_table = soup.find(id="results").find('table')

        if "No results found" in soup.text:
            return IndexerProcessingResult(entries=[], queries=[], total=0, total_known=True, has_more=False, rejected=self.getRejectedCountDict())
        if not main_table or not main_table.find("tbody"):
            self.error("Unable to find main table in NZBIndex page: %s..." % html[:500])
            self.debug(html[:500])
            raise IndexerResultParsingException("Unable to find main table in NZBIndex page", self)

        items = main_table.find("tbody").find_all('tr')
        
        for row in items:
            try:
                entry = self.parseRow(row)
            except IndexerResultParsingRowException:
                continue
            accepted, reason, ri = self.accept_result(entry, searchRequest, self.supportedFilters)
            if accepted:
                entries.append(entry)
            else:
                countRejected[ri] += 1
                self.debug("Rejected search result. Reason: %s" % reason)
        try:
            page_links = main_table.find("tfoot").find_all("tr")[1].find_all('a')
            if len(page_links) == 0:
                total = len(entries)
                has_more = False
            else:
                pagecount = int(page_links[-2].text)
                currentpage = int(main_table.find("tfoot").find_all("tr")[1].find("b").text) #Don't count "next"
                has_more = pagecount > currentpage
                total = self.limit * pagecount #Good enough
        except Exception:
            self.exception("Error while trying to find page count")
            total = len(entries)
            has_more = False

            self.debug("Finished processing results")
        return IndexerProcessingResult(entries=entries, queries=[], total=total, total_known=True, has_more=has_more, rejected=countRejected)
예제 #5
0
    def process_query_result(self, xml, searchRequest, maxResults=None):
        entries = []
        countRejected = 0
        try:
            tree = ET.fromstring(xml)
        except Exception:
            self.exception("Error parsing XML: %s..." % xml[:500])
            logger.debug(xml)
            raise IndexerResultParsingException("Error parsing XML", self)
        for elem in tree.iter('item'):
            title = elem.find("title")
            url = elem.find("enclosure")
            pubdate = elem.find("pubDate")
            if title is None or url is None or pubdate is None:
                continue

            entry = self.create_nzb_search_result()
            entry.title = title.text
            entry.link = url.attrib["url"]
            entry.size = int(url.attrib["length"])
            entry.has_nfo = NzbSearchResult.HAS_NFO_NO
            entry.category = getCategoryByName("anime")
            entry.indexerguid = elem.find("guid").text
            entry.details_link = entry.link.replace("dl", "info")
            pubdate = arrow.get(pubdate.text, 'ddd, DD MMM YYYY HH:mm:ss Z')
            entry.epoch = pubdate.timestamp
            entry.pubdate_utc = str(pubdate)
            entry.pubDate = pubdate
            entry.age_days = (arrow.utcnow() - pubdate).days

            accepted, reason = self.accept_result(entry, searchRequest,
                                                  self.supportedFilters)
            if accepted:
                entries.append(entry)
            else:
                countRejected += 1
                self.debug("Rejected search result. Reason: %s" % reason)

        return IndexerProcessingResult(entries=entries,
                                       queries=[],
                                       total_known=True,
                                       has_more=False,
                                       total=len(entries),
                                       rejected=countRejected)
예제 #6
0
    def process_query_result(self, html, searchRequest, maxResults=None):
        self.debug("Started processing results")
        logger.info("Last results count %d" % self.last_results_count)

        entries = Set([])
        countRejected = 0
        self.debug("Using HTML parser %s" %
                   config.settings.searching.htmlParser)
        soup = BeautifulSoup(html, config.settings.searching.htmlParser)

        if "No results in most popular groups" in soup.text:
            logger.info("No results found for query")
            return IndexerProcessingResult(entries=[],
                                           queries=[],
                                           total_known=0,
                                           has_more=False,
                                           total=0,
                                           rejected=0)
        main_table = soup.find('table', attrs={'id': 'r2'})

        if not main_table:
            self.debug(html[:500])
            raise IndexerResultParsingException(
                "Unable to find main table in binsearch page. This happens sometimes... :-)",
                self)

        items = main_table.find_all('tr')

        for row in items:
            try:
                entry = self.parseRow(row)
            except IndexerResultParsingRowException:
                continue
            accepted, reason = self.accept_result(entry, searchRequest,
                                                  self.supportedFilters)
            if accepted:
                entries.add(entry)
            else:
                countRejected += 1
                self.debug("Rejected search result. Reason: %s" % reason)

        self.debug("Finished processing %d results" % len(entries))

        page_links = soup.find_all('table', attrs={'class':
                                                   'xMenuT'})[1].find_all("a")
        has_more = len(page_links) > 0 and page_links[-1].text == ">"
        total_known = False
        total = 100
        if len(page_links) == 0:
            m = re.compile(r".* (\d+)\+? records.*").search(
                soup.find_all('table', attrs={'class': 'xMenuT'})[1].text)
            if m:
                total = int(m.group(1))
                total_known = True

        return IndexerProcessingResult(entries=entries,
                                       queries=[],
                                       total_known=total_known,
                                       has_more=has_more,
                                       total=total,
                                       rejected=countRejected)
예제 #7
0
파일: nzbindex.py 프로젝트: nzbis/nzbhydra
    def process_query_result(self, html, maxResults=None):
        self.debug("Started processing results")

        entries = []
        logger.debug("Using HTML parser %s" %
                     config.settings.searching.htmlParser)
        soup = BeautifulSoup(html, config.settings.searching.htmlParser)
        main_table = soup.find(id="results").find('table')

        if "No results found" in soup.text:
            return IndexerProcessingResult(entries=[],
                                           queries=[],
                                           total=0,
                                           total_known=True,
                                           has_more=False)
        if not main_table or not main_table.find("tbody"):
            self.error("Unable to find main table in NZBIndex page: %s..." %
                       html[:500])
            self.debug(html[:500])
            raise IndexerResultParsingException(
                "Unable to find main table in NZBIndex page", self)

        items = main_table.find("tbody").find_all('tr')
        size_pattern = re.compile(
            r"(?P<size>[0-9]+(\.[0-9]+)?).(?P<unit>(GB|MB|KB|B))")
        age_pattern = re.compile(r"(?P<days1>\d+)\.(?P<days2>\d)")
        title_pattern = re.compile(
            r'"(.*)\.(rar|nfo|mkv|par2|001|nzb|url|zip|r[0-9]{2})"')
        for row in items:
            tds = list(row.find_all("td"))
            if len(tds) != 5:
                # advertisement
                continue
            entry = self.create_nzb_search_result()

            entry.indexerguid = row.find("input")["value"]

            infotd = tds[1]

            if "password protected" in infotd.text.lower():
                entry.passworded = True

            title = infotd.find("label").text
            title = title.replace("\n", "")
            title = re.sub(" +", "", title)

            m = title_pattern.search(title)
            if m:
                entry.title = m.group(1)
            else:
                entry.title = title

            info = infotd.find("div", class_="fileinfo")
            if info is not None and re.compile(r"\d NFO").search(
                    info.text):  # 1 nfo file is missing if there is no NFO
                entry.has_nfo = NzbSearchResult.HAS_NFO_YES
            else:
                entry.has_nfo = NzbSearchResult.HAS_NFO_NO
            poster = infotd.find("span", class_="poster").find("a")
            if poster is not None:
                poster = poster.text.replace("\n", "")
                poster = re.sub(" +", "", poster)
                entry.poster = poster.replace("(", " (").replace("<",
                                                                 " <").strip()

            link = infotd.findAll('a', text=re.compile('Download'))
            if link is not None and len(link) == 1:
                entry.link = link[0]["href"]
            else:
                self.debug("Did not find link in row")

            entry.category = "N/A"

            sizetd = tds[2]

            m = size_pattern.search(sizetd.text)
            if not m:
                self.debug("Unable to find size information in %s" %
                           sizetd.text)
            else:
                size = float(m.group("size"))
                unit = m.group("unit")
                if unit == "KB":
                    size *= 1024
                elif unit == "MB":
                    size = size * 1024 * 1024
                elif unit == "GB":
                    size = size * 1024 * 1024 * 1024
                entry.size = int(size)

            grouptd = tds[3]
            group = grouptd.text.replace("\n",
                                         "").replace("a.b.",
                                                     "alt.binaries.").strip()
            entry.group = group

            agetd = tds[4]

            m = age_pattern.search(agetd.text)
            days = None
            hours = None
            if m:
                days = int(m.group("days1"))
                hours = int(m.group("days2")) * 2.4
            else:
                p = re.compile(r"(?P<hours>\d+) hours?")
                m = p.search(agetd.text)
                if m:
                    days = 0
                    hours = int(m.group("hours"))
            if hours is not None:
                pubdate = arrow.utcnow().replace(
                    days=-days,
                    hours=-1)  # hours because of timezone change below
                if hours > 0:
                    pubdate = pubdate.replace(hours=-hours)
                pubdate = pubdate.to(
                    "+01:00")  # nzbindex server time, I guess?
                entry.epoch = pubdate.timestamp
                entry.pubdate_utc = str(pubdate)
                entry.age_days = (arrow.utcnow() - pubdate).days
                entry.age_precise = True  # Precise to 2.4 hours, should be enough for duplicate detection
                entry.pubDate = pubdate.format("ddd, DD MMM YYYY HH:mm:ss Z")
            else:
                self.debug("Found no age info in %s" % str(agetd))

            collection_links = infotd.findAll("a",
                                              href=True,
                                              text="View collection")
            if collection_links is not None and len(collection_links) > 0:
                entry.details_link = collection_links[0].attrs["href"]
            accepted, reason = self.accept_result(entry)
            if accepted:
                entries.append(entry)
            else:
                self.debug("Rejected search result. Reason: %s" % reason)
        try:
            page_links = main_table.find("tfoot").find_all("tr")[1].find_all(
                'a')
            if len(page_links) == 0:
                total = len(entries)
                has_more = False
            else:
                pagecount = int(page_links[-2].text)
                currentpage = int(
                    main_table.find("tfoot").find_all("tr")[1].find(
                        "b").text)  #Don't count "next"
                has_more = pagecount > currentpage
                total = self.limit * pagecount  #Good enough
        except Exception:
            self.exception("Error while trying to find page count")
            total = len(entries)
            has_more = False

            self.debug("Finished processing results")
        return IndexerProcessingResult(entries=entries,
                                       queries=[],
                                       total=total,
                                       total_known=True,
                                       has_more=has_more)
예제 #8
0
파일: nzbclub.py 프로젝트: w33ty/nzbhydra
    def process_query_result(self, xml, searchRequest, maxResults=None):
        self.debug("Started processing results")
        entries = []
        countRejected = 0
        try:
            tree = ET.fromstring(xml)
        except Exception:
            self.exception("Error parsing XML: %s..." % xml[:500])
            self.debug(xml[:500])
            raise IndexerResultParsingException(
                "Error while parsing XML from NZBClub", self)

        group_pattern = re.compile(r"Newsgroup: ?([\w@\. \(\)]+) <br />")
        poster_pattern = re.compile(r"Poster: ?([\w@\. \(\)]+) <br />")
        for elem in tree.iter('item'):
            title = elem.find("title")
            url = elem.find("enclosure")
            pubdate = elem.find("pubDate")
            if title is None or url is None or pubdate is None:
                continue

            entry = self.create_nzb_search_result()
            if "password protect" in title.text.lower(
            ) or "passworded" in title.text.lower():
                entry.passworded = True

            p = re.compile(r'"(.*)"')
            m = p.search(title.text)
            if m:
                entry.title = m.group(1)
            else:
                entry.title = title.text

            entry.link = url.attrib["url"]
            entry.size = int(url.attrib["length"])
            entry.indexer = self.name
            entry.category = "N/A"
            entry.details_link = elem.find("link").text

            entry.indexerguid = elem.find("guid").text[
                -8:]  # GUID looks like "http://www.nzbclub.com/nzb_view58556415" of which we only want the last part

            description = elem.find("description").text
            description = urlparse.unquote(description).replace("+", " ")
            if re.compile(r"\d NFO Files").search(
                    description
            ):  # [x NFO Files] is missing if there is no NFO
                entry.has_nfo = NzbSearchResult.HAS_NFO_YES
            else:
                entry.has_nfo = NzbSearchResult.HAS_NFO_NO
            m = group_pattern.search(description)
            if m:
                entry.group = m.group(1).strip()
            m = poster_pattern.search(description)
            if m:
                entry.poster = m.group(1).strip()

            try:

                pubdate = arrow.get(pubdate.text,
                                    'ddd, DD MMM YYYY HH:mm:ss Z')
                entry.epoch = pubdate.timestamp
                entry.pubdate_utc = str(pubdate)
                entry.age_days = (arrow.utcnow() - pubdate).days
                entry.pubDate = pubdate.format("ddd, DD MMM YYYY HH:mm:ss Z")
            except Exception as e:
                self.error("Unable to parse pubdate %s" % pubdate.text)
                continue

            accepted, reason = self.accept_result(entry, searchRequest,
                                                  self.supportedFilters)
            if accepted:
                entries.append(entry)
            else:
                countRejected += 1
                self.debug("Rejected search result. Reason: %s" % reason)

        self.debug("Finished processing results")
        return IndexerProcessingResult(
            entries=entries,
            queries=[],
            total=len(entries),
            total_known=True,
            has_more=False,
            rejected=countRejected
        )  # No paging with RSS. Might need/want to change to HTML and BS
예제 #9
0
    def process_query_result(self,
                             xml_response,
                             searchRequest,
                             maxResults=None):
        self.debug("Started processing results")

        if "0 results found" in xml_response:
            return IndexerProcessingResult(entries=[],
                                           queries=[],
                                           total=0,
                                           total_known=True,
                                           has_more=False,
                                           rejected=0)
        if "search to short" in xml_response:
            self.info("omgwtf says the query was too short")
            return IndexerProcessingResult(entries=[],
                                           queries=[],
                                           total=0,
                                           total_known=True,
                                           has_more=False,
                                           rejected=0)

        entries = []
        countRejected = 0
        try:
            tree = ET.fromstring(xml_response)
        except Exception:
            self.exception("Error parsing XML: %s..." % xml_response[:500])
            raise IndexerResultParsingException("Error parsing XML", self)

        if tree.tag == "xml":
            total = int(tree.find("info").find("results").text)
            current_page = int(tree.find("info").find("current_page").text)
            total_pages = int(tree.find("info").find("pages").text)
            has_more = current_page < total_pages
            for item in tree.find("search_req").findall("post"):
                entry = self.create_nzb_search_result()
                entry.indexerguid = item.find("nzbid").text
                entry.title = item.find("release").text
                entry.group = item.find("group").text
                entry.link = item.find("getnzb").text
                entry.size = long(item.find("sizebytes").text)
                entry.epoch = long(item.find("usenetage").text)
                pubdate = arrow.get(entry.epoch)
                entry.pubdate_utc = str(pubdate)
                entry.pubDate = pubdate.format("ddd, DD MMM YYYY HH:mm:ss Z")
                entry.age_days = (arrow.utcnow() - pubdate).days
                entry.age_precise = True
                entry.details_link = item.find("details").text
                entry.has_nfo = NzbSearchResult.HAS_NFO_YES if item.find(
                    "getnfo") is not None else NzbSearchResult.HAS_NFO_NO
                categoryid = item.find("categoryid").text
                if categoryid in omgwtf_to_categories.keys():
                    entry.category = omgwtf_to_categories[categoryid]
                else:
                    entry.category = "N/A"
                entries.append(entry)
            return IndexerProcessingResult(entries=entries,
                                           queries=[],
                                           total=total,
                                           total_known=True,
                                           has_more=has_more,
                                           rejected=countRejected)
        elif tree.tag == "rss":
            regexGuid = re.compile(r".*\?id=(\w+)&.*")
            regexGroup = re.compile(r".*Group:<\/b> ([\w\.\-]+)<br \/>.*")
            for item in tree.find("channel").findall("item"):
                entry = self.create_nzb_search_result()
                indexerguid = item.find("guid").text
                m = regexGuid.match(indexerguid)
                if m:
                    entry.indexerguid = m.group(1)
                else:
                    self.warn("Unable to find GUID in " + indexerguid)
                    continue
                entry.title = item.find("title").text
                description = item.find("description").text
                m = regexGroup.match(description)
                if m:
                    entry.group = m.group(1)
                else:
                    self.warn("Unable to find group in " + description)
                    continue
                entry.size = long(item.find("enclosure").attrib["length"])
                entry.pubDate = item.find("pubDate").text
                pubdate = arrow.get(entry.pubDate,
                                    'ddd, DD MMM YYYY HH:mm:ss Z')
                entry.epoch = pubdate.timestamp
                entry.pubdate_utc = str(pubdate)
                entry.age_days = (arrow.utcnow() - pubdate).days
                entry.precise_date = True
                entry.has_nfo = NzbSearchResult.HAS_NFO_MAYBE
                categoryid = item.find("categoryid").text
                if categoryid in omgwtf_to_categories.keys():
                    entry.category = omgwtf_to_categories[categoryid]
                else:
                    entry.category = "N/A"
                accepted, reason = self.accept_result(entry, searchRequest,
                                                      self.supportedFilters)
                if accepted:
                    entries.append(entry)
                else:
                    countRejected += 1
                    self.debug("Rejected search result. Reason: %s" % reason)
            return IndexerProcessingResult(entries=entries,
                                           queries=[],
                                           total=len(entries),
                                           total_known=True,
                                           has_more=False,
                                           rejected=countRejected)
        else:
            self.warn("Unknown response type: %s" % xml_response[:100])
            return IndexerProcessingResult(entries=[],
                                           queries=[],
                                           total=0,
                                           total_known=True,
                                           has_more=False,
                                           rejected=countRejected)
예제 #10
0
    def process_query_result(self,
                             xml_response,
                             searchRequest,
                             maxResults=None):
        self.debug("Started processing results")

        entries = []
        countRejected = 0
        grouppattern = re.compile(r"Group:</b> ?([\w\.]+)<br ?/>")
        guidpattern = re.compile(r"(.*/)?([a-zA-Z0-9@\.]+)")

        try:
            tree = ET.fromstring(xml_response)
        except Exception:
            self.exception("Error parsing XML: %s..." % xml_response[:500])
            raise IndexerResultParsingException("Error parsing XML", self)
        for item in tree.find("channel").findall("item"):
            usenetdate = None
            entry = self.create_nzb_search_result()
            # These are the values that absolutely must be contained in the response
            entry.title = item.find("title").text
            entry.link = item.find("link").text
            entry.attributes = []
            entry.pubDate = item.find("pubDate").text
            entry.indexerguid = item.find("guid").text
            entry.has_nfo = NzbSearchResult.HAS_NFO_MAYBE
            m = guidpattern.search(entry.indexerguid)
            if m:
                entry.indexerguid = m.group(2)

            description = item.find("description")
            if description is not None:
                description = description.text
                if description is not None and "Group:" in description:  # DogNZB has the group in its description
                    m = grouppattern.search(description)
                    if m and m.group(1) != "not available":
                        entry.group = m.group(1)

            categories = []
            for i in item.findall("./newznab:attr", {
                    "newznab":
                    "http://www.newznab.com/DTD/2010/feeds/attributes/"
            }):
                attribute_name = i.attrib["name"]
                attribute_value = i.attrib["value"]
                if attribute_name == "size":
                    entry.size = int(attribute_value)
                elif attribute_name == "guid":
                    entry.indexerguid = attribute_value
                elif attribute_name == "category" and attribute_value != "":
                    try:
                        categories.append(int(attribute_value))
                    except ValueError:
                        self.error("Unable to parse category %s" %
                                   attribute_value)
                elif attribute_name == "poster":
                    entry.poster = attribute_value
                elif attribute_name == "info":
                    entry.details_link = attribute_value
                elif attribute_name == "password" and attribute_value != "0":
                    entry.passworded = True
                elif attribute_name == "group" and attribute_value != "not available":
                    entry.group = attribute_value
                elif attribute_name == "usenetdate":
                    usenetdate = arrow.get(attribute_value,
                                           'ddd, DD MMM YYYY HH:mm:ss Z')
                # Store all the extra attributes, we will return them later for external apis
                entry.attributes.append({
                    "name": attribute_name,
                    "value": attribute_value
                })
            if entry.details_link is None:
                entry.details_link = self.get_details_link(entry.indexerguid)

            if usenetdate is None:
                # Not provided by attributes, use pubDate instead
                usenetdate = arrow.get(entry.pubDate,
                                       'ddd, DD MMM YYYY HH:mm:ss Z')
            entry.epoch = usenetdate.timestamp
            entry.pubdate_utc = str(usenetdate)
            entry.age_days = (arrow.utcnow() - usenetdate).days
            entry.precise_date = True

            # Map category. Try to find the most specific category (like 2040), then the more general one (like 2000)
            categories = sorted(
                categories, reverse=True
            )  # Sort to make the most specific category appear first
            if len(categories) > 0:
                for k, v in categories_to_newznab.items():
                    for c in categories:
                        if c in v:
                            entry.category = k
                            break

            accepted, reason = self.accept_result(entry, searchRequest,
                                                  self.supportedFilters)
            if accepted:
                entries.append(entry)
            else:
                countRejected += 1
                self.debug("Rejected search result. Reason: %s" % reason)
            if maxResults is not None and len(entries) == maxResults:
                break

        response_total_offset = tree.find(
            "./channel[1]/newznab:response",
            {"newznab": "http://www.newznab.com/DTD/2010/feeds/attributes/"})
        if response_total_offset is None or response_total_offset.attrib[
                "total"] == "" or response_total_offset.attrib["offset"] == "":
            self.warn(
                "Indexer returned a result page without total results and offset. Shame! *rings bell*"
            )
            offset = 0
            total = len(entries)
        else:
            total = int(response_total_offset.attrib["total"])
            offset = int(response_total_offset.attrib["offset"])
        if total == 0 or len(entries) == 0:
            self.info("Query returned no results")
            return IndexerProcessingResult(entries=entries,
                                           queries=[],
                                           total=0,
                                           total_known=True,
                                           has_more=False,
                                           rejected=0)

        return IndexerProcessingResult(entries=entries,
                                       queries=[],
                                       total=total,
                                       total_known=True,
                                       has_more=offset + len(entries) < total,
                                       rejected=countRejected)
예제 #11
0
    def process_query_result(self, html, searchRequest, maxResults=None):
        self.debug("Started processing results")
        logger.info("Last results count %d" % self.last_results_count)
        entries = Set([])
        countRejected = 0
        soup = BeautifulSoup(html, config.settings.searching.htmlParser)
        self.debug("Using HTML parser %s" %
                   config.settings.searching.htmlParser)

        main_table = soup.find('table', attrs={'id': 'r2'})

        if not main_table:
            self.warn(
                "Unable to find main table in binsearch page. This just sometimes happens..."
            )
            self.debug(html[:500])
            raise IndexerResultParsingException(
                "Unable to find main table in binsearch page. This happens sometimes... :-)",
                self)

        items = main_table.find_all('tr')
        title_pattern = re.compile(
            r'"(.*)\.(rar|nfo|mkv|par2|001|nzb|url|zip|r[0-9]{2})"')
        size_pattern = re.compile(
            r"size: (?P<size>[0-9]+(\.[0-9]+)?).(?P<unit>(GB|MB|KB|B))")
        poster_pattern = re.compile(r"&p=(.*)&")
        goup_pattern = re.compile(r"&g=([\w\.]*)&")
        nfo_pattern = re.compile(r"\d nfo file")
        for row in items:
            entry = self.create_nzb_search_result()
            title = row.find('span', attrs={'class': 's'})

            if title is None:
                self.debug("Ignored entry because it has no title")
                continue
            title = title.text

            if "password protect" in title.lower(
            ) or "passworded" in title.lower():
                entry.passworded = True

            m = title_pattern.search(title)
            if m:
                entry.title = m.group(1)
            else:
                entry.title = title

            entry.indexerguid = row.find("input", attrs={"type":
                                                         "checkbox"})["name"]
            entry.link = "https://www.binsearch.info/fcgi/nzb.fcgi?q=%s" % entry.indexerguid
            info = row.find("span", attrs={"class": "d"})
            if info is None:
                self.debug("Ignored entry because it has no info")
                continue

            collection_link = info.find(
                "a"
            )["href"]  # '/?b=MARVELS.AVENGERS.AGE.OF.ULTRON.3D.TOPBOT.TrueFrench.1080p.X264.A&g=alt.binaries.movies.mkv&p=Ramer%40marmer.com+%28Clown_nez%29&max=250'
            entry.details_link = "%s%s" % (self.host, collection_link)
            m = goup_pattern.search(collection_link)
            if m:
                entry.group = m.group(1).strip()

            m = poster_pattern.search(collection_link)
            if m:
                poster = m.group(1).strip()
                entry.poster = urlparse.unquote(poster).replace("+", " ")

            # Size
            m = size_pattern.search(info.text)
            if not m:
                self.debug("Unable to find size information in %s" % info.text)
            else:
                size = float(m.group("size"))
                unit = m.group("unit")
                if unit == "B":
                    pass
                elif unit == "KB":
                    size *= 1024
                elif unit == "MB":
                    size = size * 1024 * 1024
                elif unit == "GB":
                    size = size * 1024 * 1024 * 1024

                entry.size = int(size)

            entry.category = "N/A"

            if nfo_pattern.search(
                    info.text):  # 1 nfo file is missing if there is no NFO
                entry.has_nfo = NzbSearchResult.HAS_NFO_YES
            else:
                entry.has_nfo = NzbSearchResult.HAS_NFO_NO

            # Age
            try:
                pubdate = re.compile(r"(\d{1,2}\-\w{3}\-\d{4})").search(
                    row.text).group(1)
                pubdate = arrow.get(pubdate, "DD-MMM-YYYY")
                entry.epoch = pubdate.timestamp
                entry.pubdate_utc = str(pubdate)
                entry.age_days = (arrow.utcnow() - pubdate).days
                entry.age_precise = False
                entry.pubDate = pubdate.format("ddd, DD MMM YYYY HH:mm:ss Z")
            except Exception as e:
                self.error("Unable to find age in %s" %
                           row.find_all("td")[-1:][0].text)
                continue

            accepted, reason = self.accept_result(entry, searchRequest,
                                                  self.supportedFilters)
            if accepted:
                entries.add(entry)
            else:
                countRejected += 1
                self.debug("Rejected search result. Reason: %s" % reason)

        self.debug("Finished processing %d results" % len(entries))

        page_links = soup.find_all('table', attrs={'class':
                                                   'xMenuT'})[1].find_all("a")
        has_more = len(page_links) > 0 and page_links[-1].text == ">"
        total_known = False
        total = 100
        if len(page_links) == 0:
            m = re.compile(r".* (\d+)\+? records.*").search(
                soup.find_all('table', attrs={'class': 'xMenuT'})[1].text)
            if m:
                total = int(m.group(1))
                total_known = True

        return IndexerProcessingResult(entries=entries,
                                       queries=[],
                                       total_known=total_known,
                                       has_more=has_more,
                                       total=total,
                                       rejected=countRejected)