Example #1
0
    def parseItem(self, elem):
        title = elem.find("title")
        url = elem.find("enclosure")
        pubdate = elem.find("pubDate")
        if title is None or url is None or pubdate is None:
            raise IndexerResultParsingRowException(
                "Unable to find title, url or date")
        entry = self.create_nzb_search_result()
        if "password protect" in title.text.lower(
        ) or "passworded" in title.text.lower():
            entry.passworded = True
        p = re.compile(r'"(.*)"')
        m = p.search(title.text)
        if m:
            entry.title = m.group(1)
        else:
            entry.title = title.text
        entry.link = url.attrib["url"]
        entry.size = int(url.attrib["length"])
        entry.indexer = self.name
        entry.category = getUnknownCategory()
        entry.details_link = elem.find("link").text
        entry.indexerguid = elem.find("guid").text[
            -8:]  # GUID looks like "http://www.nzbclub.com/nzb_view58556415" of which we only want the last part
        description = elem.find("description").text
        description = urlparse.unquote(description).replace("+", " ")
        if re.compile(r"\d NFO Files").search(
                description):  # [x NFO Files] is missing if there is no NFO
            entry.has_nfo = NzbSearchResult.HAS_NFO_YES
        else:
            entry.has_nfo = NzbSearchResult.HAS_NFO_NO
        m = self.group_pattern.search(description)
        if m:
            entry.group = m.group(1).strip()
        m = self.poster_pattern.search(description)
        if m:
            entry.poster = m.group(1).strip()
        try:

            pubdate = arrow.get(pubdate.text, 'ddd, DD MMM YYYY HH:mm:ss Z')
            entry.epoch = pubdate.timestamp
            entry.pubdate_utc = str(pubdate)
            entry.age_days = (arrow.utcnow() - pubdate).days
            entry.pubDate = pubdate.format("ddd, DD MMM YYYY HH:mm:ss Z")
        except Exception:
            self.error("Unable to parse pubdate %s" % pubdate.text)
            raise IndexerResultParsingRowException("Unable to parse date")
        return entry
Example #2
0
    def parse_date(self, agetd, entry):
        m = self.age_pattern.search(agetd.text)
        days = None
        hours = None
        if m:
            days = int(m.group("days1"))
            hours = int(m.group("days2")) * 2.4
        else:
            p = re.compile(r"(?P<hours>\d+) hours?")
            m = p.search(agetd.text)
            if m:
                days = 0
                hours = int(m.group("hours"))
        if hours is not None:
            pubdate = arrow.utcnow().replace(days=-days, hours=-1)  # hours because of timezone change below
            if hours > 0:
                pubdate = pubdate.replace(hours=-hours)
            pubdate = pubdate.to("+01:00")  # nzbindex server time, I guess?
            entry.epoch = pubdate.timestamp
            entry.pubdate_utc = str(pubdate)
            entry.age_days = (arrow.utcnow() - pubdate).days
            entry.age = str(entry.age_days) + "d"
            entry.age_precise = True  # Precise to 2.4 hours, should be enough for duplicate detection
            entry.pubDate = pubdate.format("ddd, DD MMM YYYY HH:mm:ss Z")

        else:
            self.error("Found no age info in %s" % str(agetd))
            raise IndexerResultParsingRowException("Unable to parse age")
Example #3
0
 def parseRow(self, row):
     tds = list(row.find_all("td"))
     if len(tds) != 5:
         # advertisement
         raise IndexerResultParsingRowException("Ad")
     entry = self.create_nzb_search_result()
     entry.indexerguid = row.find("input")["value"]
     infotd = tds[1]
     if "password protected" in infotd.text.lower():
         entry.passworded = True
     title = infotd.find("label").text
     title = title.replace("\n", "")
     title = re.sub(" +", " ", title)
     m = self.title_pattern.search(title)
     if m:
         entry.title = m.group(1)
     else:
         entry.title = title
     entry.title = self.cleanUpTitle(entry.title)
     info = infotd.find("div", class_="fileinfo")
     if info is not None and re.compile(r"\d NFO").search(
             info.text):  # 1 nfo file is missing if there is no NFO
         entry.has_nfo = NzbSearchResult.HAS_NFO_YES
     else:
         entry.has_nfo = NzbSearchResult.HAS_NFO_NO
     entry.poster = self.parse_poster(infotd)
     link = infotd.findAll('a', text=re.compile('Download'))
     if link is not None and len(link) == 1:
         entry.link = link[0]["href"]
     else:
         self.debug("Did not find link in row")
     complete = infotd.find("span", class_="complete")
     if complete:
         entry.files = complete.text[0:complete.text.find(" ")]
     entry.category = getUnknownCategory()
     sizetd = tds[2]
     entry.size = self.parse_size(sizetd)
     grouptd = tds[3]
     group = grouptd.text.replace("\n",
                                  "").replace("a.b.",
                                              "alt.binaries.").strip()
     entry.group = group
     agetd = tds[4]
     self.parse_date(agetd, entry)
     collection_links = infotd.findAll("a",
                                       href=True,
                                       text="View collection")
     if collection_links is not None and len(collection_links) > 0:
         entry.details_link = collection_links[0].attrs["href"]
     return entry
Example #4
0
    def parseRow(self, row):
        entry = self.create_nzb_search_result()
        title = row.find('span', attrs={'class': 's'})

        if title is None:
            self.debug("Ignored entry because it has no title")
            raise IndexerResultParsingRowException("No title found")
        title = title.text

        if "password protect" in title.lower() or "passworded" in title.lower(
        ):
            entry.passworded = True

        m = self.title_pattern.search(title)
        if m:
            entry.title = m.group(1)
        else:
            entry.title = title

        entry.indexerguid = row.find("input", attrs={"type":
                                                     "checkbox"})["name"]
        entry.link = self.get_nzb_link(entry.indexerguid, None)
        info = row.find("span", attrs={"class": "d"})
        if info is None:
            self.debug("Ignored entry because it has no info")
            raise IndexerResultParsingRowException("No info found")

        collection_link = info.find(
            "a"
        )["href"]  # '/?b=MARVELS.AVENGERS.AGE.OF.ULTRON.3D.TOPBOT.TrueFrench.1080p.X264.A&g=alt.binaries.movies.mkv&p=Ramer%40marmer.com+%28Clown_nez%29&max=250'
        entry.details_link = "%s%s" % (self.host, collection_link)
        m = self.goup_pattern.search(collection_link)
        if m:
            entry.group = m.group(1).strip()

        m = self.poster_pattern.search(collection_link)
        if m:
            poster = m.group(1).strip()
            entry.poster = urlparse.unquote(poster).replace("+", " ")

        # Size
        m = self.size_pattern.search(info.text)
        if not m:
            self.debug("Unable to find size information in %s" % info.text)
        else:
            size = float(m.group("size"))
            unit = m.group("unit")
            if unit == "GB":
                size = size * 1024 * 1024 * 1024
            elif unit == "KB":
                size *= 1024
            elif unit == "MB":
                size = size * 1024 * 1024

            entry.size = int(size)

        entry.category = getUnknownCategory()

        if self.nfo_pattern.search(
                info.text):  # 1 nfo file is missing if there is no NFO
            entry.has_nfo = NzbSearchResult.HAS_NFO_YES
        else:
            entry.has_nfo = NzbSearchResult.HAS_NFO_NO

        # Age
        try:
            pubdate = re.compile(r"(\d{1,2}\-\w{3}\-\d{4})").search(
                row.text).group(1)
            pubdate = arrow.get(pubdate, "DD-MMM-YYYY")
            entry.epoch = pubdate.timestamp
            entry.pubdate_utc = str(pubdate)
            entry.age_days = (arrow.utcnow() - pubdate).days
            entry.age_precise = False
            entry.pubDate = pubdate.format("ddd, DD MMM YYYY HH:mm:ss Z")
        except Exception as e:
            self.error("Unable to find age in %s" %
                       row.find_all("td")[-1:][0].text)
            raise IndexerResultParsingRowException("Unable to parse age")
        return entry
Example #5
0
    def process_query_result(self, xml_response, searchRequest, maxResults=None):
        self.debug("Started processing results")

        if "0 results found" in xml_response:
            return IndexerProcessingResult(entries=[], queries=[], total=0, total_known=True, has_more=False, rejected=0)
        if "search to short" in xml_response:
            self.info("omgwtf says the query was too short")
            return IndexerProcessingResult(entries=[], queries=[], total=0, total_known=True, has_more=False, rejected=0)
            
        entries = []
        countRejected = 0
        try:
            tree = ET.fromstring(xml_response)
        except Exception:
            self.exception("Error parsing XML: %s..." % xml_response[:500])
            raise IndexerResultParsingException("Error parsing XML", self)
        
        if tree.tag == "xml":
            total = int(tree.find("info").find("results").text)
            current_page = int(tree.find("info").find("current_page").text)
            total_pages = int(tree.find("info").find("pages").text)
            has_more = current_page < total_pages
            for item in tree.find("search_req").findall("post"):
                entry = self.parseItem(item)
                accepted, reason = self.accept_result(entry, searchRequest, self.supportedFilters)
                if accepted:
                    entries.append(entry)
                else:
                    countRejected += 1
                    self.debug("Rejected search result. Reason: %s" % reason)
            return IndexerProcessingResult(entries=entries, queries=[], total=total, total_known=True, has_more=has_more, rejected=countRejected)      
        elif tree.tag == "rss":
            for item in tree.find("channel").findall("item"):
                entry = self.create_nzb_search_result()
                indexerguid = item.find("guid").text
                m = self.regexGuid.match(indexerguid)
                if m:
                    entry.indexerguid = m.group(1)
                else:
                    self.warn("Unable to find GUID in " + indexerguid)
                    raise IndexerResultParsingRowException("Unable to find GUID")
                entry.title = item.find("title").text
                description = item.find("description").text
                m = self.regexGroup.match(description)
                if m:
                    entry.group = m.group(1)
                else:
                    self.warn("Unable to find group in " + description)
                    raise IndexerResultParsingRowException("Unable to find usenet group")
                entry.size = long(item.find("enclosure").attrib["length"])
                entry.pubDate = item.find("pubDate").text
                pubdate = arrow.get(entry.pubDate, 'ddd, DD MMM YYYY HH:mm:ss Z')
                entry.epoch = pubdate.timestamp
                entry.pubdate_utc = str(pubdate)
                entry.age_days = (arrow.utcnow() - pubdate).days
                entry.precise_date = True
                entry.link = item.find("link").text
                entry.has_nfo = NzbSearchResult.HAS_NFO_MAYBE
                categoryid = item.find("categoryid").text
                entry.details_link = self.get_details_link(entry.indexerguid)
                if categoryid in omgwtf_to_categories.keys():
                    entry.category = getCategoryByName(omgwtf_to_categories[categoryid])
                else:
                    entry.category = getUnknownCategory()
                accepted, reason = self.accept_result(entry, searchRequest, self.supportedFilters)
                if accepted:
                    entries.append(entry)
                else:
                    countRejected += 1
                    self.debug("Rejected search result. Reason: %s" % reason)
            return IndexerProcessingResult(entries=entries, queries=[], total=len(entries), total_known=True, has_more=False, rejected=countRejected)
        else:
            self.warn("Unknown response type: %s" % xml_response[:100])
            return IndexerProcessingResult(entries=[], queries=[], total=0, total_known=True, has_more=False, rejected=countRejected)