Ejemplo n.º 1
0
 def __init__(self, title=None, link=None, indexer=None, guid=None, size=None, category=None, attributes=None, epoch=None, pubDate=None, pubdate_utc=None, age_days=None, poster=None, has_nfo=HAS_NFO_YES, indexerguid=None, details_link=None, group=None, indexerscore=0, dbsearchid=None, passworded=False):
 
     self.title = title
     self.link = link
     self.epoch = epoch
     self.pubdate_utc = pubdate_utc
     self.age_days = age_days
     self.age_precise = True #Set to false if the age is not received from a pubdate but from an age. That might influence duplicity check
     self.indexer = indexer
     self.guid = guid
     self.indexerguid = indexerguid #The GUID of the indexer which we will later need to download the actual NZB 
     self.size = size
     self.category = category if category is not None else categories.getUnknownCategory()
     self.description = None
     self.comments = None
     self.attributes = attributes if attributes is not None else []
     self.search_types = [] #"general", "tv", "movie"
     self.supports_queries = True #Indexers might only provide a feed of the latest releases, e.g. womble
     self.search_ids = [] #"tvdbid", "rid", "imdbid"
     self.poster = poster
     self.has_nfo = has_nfo 
     self.details_link = details_link
     self.group = group
     self.indexerscore = indexerscore
     self.dbsearchid = dbsearchid
     self.passworded = passworded
     self.pubDate = pubDate
Ejemplo n.º 2
0
    def process_query_result(self, xml, searchRequest, maxResults=None):
        entries = []
        countRejected = self.getRejectedCountDict()
        try:
            tree = ET.fromstring(xml)
        except Exception:
            self.exception("Error parsing XML: %s..." % xml[:500])
            logger.debug(xml)
            raise IndexerResultParsingException("Error parsing XML", self)
        for elem in tree.iter('item'):
            title = elem.find("title")
            url = elem.find("enclosure")
            pubdate = elem.find("pubDate")
            if title is None or url is None or pubdate is None:
                continue

            entry = self.create_nzb_search_result()
            entry.title = title.text
            entry.link = url.attrib["url"]
            entry.has_nfo = NzbSearchResult.HAS_NFO_NO

            p = re.compile("(.*)\(Size:(\d*)")
            m = p.search(elem.find("description").text)
            if m:
                entry.description = m.group(1)
                entry.size = int(m.group(2)) * 1024 * 1024  #megabyte to byte
            if elem.find("category").text.lower() == "tv-dvdrip" or elem.find(
                    "category").text.lower() == "tv-sd":
                entry.category = getCategoryByName("tvsd")
            elif elem.find("category").text.lower() == "tv-x264" or elem.find(
                    "category").text.lower == "tv-hd":
                entry.category = getCategoryByName("tvhd")
            else:
                entry.category = getUnknownCategory()

            entry.indexerguid = elem.find("guid").text[
                30:]  #39a/The.Almighty.Johnsons.S03E06.720p.BluRay.x264-YELLOWBiRD.nzb is the GUID, only the 39a doesn't work

            pubdate = arrow.get(pubdate.text, 'M/D/YYYY h:mm:ss A')
            entry.epoch = pubdate.timestamp
            entry.pubdate_utc = str(pubdate)
            entry.pubDate = pubdate.format("ddd, DD MMM YYYY HH:mm:ss Z")
            entry.age_days = (arrow.utcnow() - pubdate).days
            entry.age = (arrow.utcnow() - pubdate).days

            accepted, reason, ri = self.accept_result(entry, searchRequest,
                                                      self.supportedFilters)
            if accepted:
                entries.append(entry)
            else:
                countRejected[ri] += 1
                self.debug("Rejected search result. Reason: %s" % reason)

        return IndexerProcessingResult(entries=entries,
                                       queries=[],
                                       total_known=True,
                                       has_more=False,
                                       total=len(entries),
                                       rejected=countRejected)
Ejemplo n.º 3
0
    def process_query_result(self, xml, searchRequest, maxResults=None):
        entries = []
        countRejected = self.getRejectedCountDict()
        try:
            tree = ET.fromstring(xml)
        except Exception:
            self.exception("Error parsing XML: %s..." % xml[:500])
            logger.debug(xml)
            raise IndexerResultParsingException("Error parsing XML", self)
        for elem in tree.iter('item'):
            title = elem.find("title")
            url = elem.find("enclosure")
            pubdate = elem.find("pubDate")
            if title is None or url is None or pubdate is None:
                continue
            
            entry = self.create_nzb_search_result()
            entry.title = title.text
            entry.link = url.attrib["url"]
            entry.has_nfo = NzbSearchResult.HAS_NFO_NO
            
            p = re.compile("(.*)\(Size:(\d*)")
            m = p.search(elem.find("description").text)
            if m:
                entry.description = m.group(1)
                entry.size = int(m.group(2)) * 1024 * 1024 #megabyte to byte
            if elem.find("category").text.lower() == "tv-dvdrip" or elem.find("category").text.lower() == "tv-sd":
                entry.category = getCategoryByName("tvsd")
            elif elem.find("category").text.lower() == "tv-x264" or elem.find("category").text.lower == "tv-hd":
                entry.category = getCategoryByName("tvhd")
            else:
                entry.category = getUnknownCategory()
                
            
            entry.indexerguid = elem.find("guid").text[30:] #39a/The.Almighty.Johnsons.S03E06.720p.BluRay.x264-YELLOWBiRD.nzb is the GUID, only the 39a doesn't work
            
            pubdate = arrow.get(pubdate.text, 'M/D/YYYY h:mm:ss A')
            entry.epoch = pubdate.timestamp
            entry.pubdate_utc = str(pubdate)
            entry.pubDate = pubdate.format("ddd, DD MMM YYYY HH:mm:ss Z")
            entry.age_days = (arrow.utcnow() - pubdate).days
            entry.age = (arrow.utcnow() - pubdate).days

            accepted, reason, ri = self.accept_result(entry, searchRequest, self.supportedFilters)
            if accepted:
                entries.append(entry)
            else:
                countRejected[ri] += 1
                self.debug("Rejected search result. Reason: %s" % reason)
        
        return IndexerProcessingResult(entries=entries, queries=[], total_known=True, has_more=False, total=len(entries), rejected=countRejected)
Ejemplo n.º 4
0
 def parseRow(self, row):
     tds = list(row.find_all("td"))
     if len(tds) != 5:
         # advertisement
         raise IndexerResultParsingRowException("Ad")
     entry = self.create_nzb_search_result()
     entry.indexerguid = row.find("input")["value"]
     infotd = tds[1]
     if "password protected" in infotd.text.lower():
         entry.passworded = True
     title = infotd.find("label").text
     title = title.replace("\n", "")
     title = re.sub(" +", " ", title)
     m = self.title_pattern.search(title)
     if m:
         entry.title = m.group(1)
     else:
         entry.title = title
     entry.title = self.cleanUpTitle(entry.title)
     info = infotd.find("div", class_="fileinfo")
     if info is not None and re.compile(r"\d NFO").search(
             info.text):  # 1 nfo file is missing if there is no NFO
         entry.has_nfo = NzbSearchResult.HAS_NFO_YES
     else:
         entry.has_nfo = NzbSearchResult.HAS_NFO_NO
     entry.poster = self.parse_poster(infotd)
     link = infotd.findAll('a', text=re.compile('Download'))
     if link is not None and len(link) == 1:
         entry.link = link[0]["href"]
     else:
         self.debug("Did not find link in row")
     complete = infotd.find("span", class_="complete")
     if complete:
         entry.files = complete.text[0:complete.text.find(" ")]
     entry.category = getUnknownCategory()
     sizetd = tds[2]
     entry.size = self.parse_size(sizetd)
     grouptd = tds[3]
     group = grouptd.text.replace("\n",
                                  "").replace("a.b.",
                                              "alt.binaries.").strip()
     entry.group = group
     agetd = tds[4]
     self.parse_date(agetd, entry)
     collection_links = infotd.findAll("a",
                                       href=True,
                                       text="View collection")
     if collection_links is not None and len(collection_links) > 0:
         entry.details_link = collection_links[0].attrs["href"]
     return entry
Ejemplo n.º 5
0
    def parseItem(self, elem):
        title = elem.find("title")
        url = elem.find("enclosure")
        pubdate = elem.find("pubDate")
        if title is None or url is None or pubdate is None:
            raise IndexerResultParsingRowException(
                "Unable to find title, url or date")
        entry = self.create_nzb_search_result()
        if "password protect" in title.text.lower(
        ) or "passworded" in title.text.lower():
            entry.passworded = True
        p = re.compile(r'"(.*)"')
        m = p.search(title.text)
        if m:
            entry.title = m.group(1)
        else:
            entry.title = title.text
        entry.link = url.attrib["url"]
        entry.size = int(url.attrib["length"])
        entry.indexer = self.name
        entry.category = getUnknownCategory()
        entry.details_link = elem.find("link").text
        entry.indexerguid = elem.find("guid").text[
            -8:]  # GUID looks like "http://www.nzbclub.com/nzb_view58556415" of which we only want the last part
        description = elem.find("description").text
        description = urlparse.unquote(description).replace("+", " ")
        if re.compile(r"\d NFO Files").search(
                description):  # [x NFO Files] is missing if there is no NFO
            entry.has_nfo = NzbSearchResult.HAS_NFO_YES
        else:
            entry.has_nfo = NzbSearchResult.HAS_NFO_NO
        m = self.group_pattern.search(description)
        if m:
            entry.group = m.group(1).strip()
        m = self.poster_pattern.search(description)
        if m:
            entry.poster = m.group(1).strip()
        try:

            pubdate = arrow.get(pubdate.text, 'ddd, DD MMM YYYY HH:mm:ss Z')
            entry.epoch = pubdate.timestamp
            entry.pubdate_utc = str(pubdate)
            entry.age_days = (arrow.utcnow() - pubdate).days
            entry.pubDate = pubdate.format("ddd, DD MMM YYYY HH:mm:ss Z")
        except Exception:
            self.error("Unable to parse pubdate %s" % pubdate.text)
            raise IndexerResultParsingRowException("Unable to parse date")
        return entry
Ejemplo n.º 6
0
 def __init__(self,
              title=None,
              link=None,
              indexer=None,
              guid=None,
              size=None,
              category=None,
              attributes=None,
              epoch=None,
              pubDate=None,
              pubdate_utc=None,
              age_days=None,
              poster=None,
              has_nfo=HAS_NFO_YES,
              indexerguid=None,
              details_link=None,
              group=None,
              indexerscore=0,
              dbsearchid=None,
              passworded=False,
              downloadType="nzb"):
     self.title = title
     self.link = link
     self.epoch = epoch
     self.pubdate_utc = pubdate_utc
     self.age_days = age_days
     self.age_precise = True  #Set to false if the age is not received from a pubdate but from an age. That might influence duplicity check
     self.indexer = indexer
     self.guid = guid
     self.indexerguid = indexerguid  #The GUID of the indexer which we will later need to download the actual NZB
     self.size = size
     self.category = category if category is not None else categories.getUnknownCategory(
     )
     self.description = None
     self.comments = None
     self.attributes = attributes if attributes is not None else []
     self.search_types = []  #"general", "tv", "movie"
     self.supports_queries = True  #Indexers might only provide a feed of the latest releases, e.g. womble
     self.search_ids = []  #"tvdbid", "rid", "imdbid"
     self.poster = poster
     self.has_nfo = has_nfo
     self.details_link = details_link
     self.group = group
     self.indexerscore = indexerscore
     self.dbsearchid = dbsearchid
     self.passworded = passworded
     self.pubDate = pubDate
     self.downloadType = downloadType
Ejemplo n.º 7
0
 def parseRow(self, row):
     tds = list(row.find_all("td"))
     if len(tds) != 5:
         # advertisement
         raise IndexerResultParsingRowException("Ad")
     entry = self.create_nzb_search_result()
     entry.indexerguid = row.find("input")["value"]
     infotd = tds[1]
     if "password protected" in infotd.text.lower():
         entry.passworded = True
     title = infotd.find("label").text
     title = title.replace("\n", "")
     title = re.sub(" +", " ", title)
     m = self.title_pattern.search(title)
     if m:
         entry.title = m.group(1)
     else:
         entry.title = title
     entry.title = self.cleanUpTitle(entry.title)
     info = infotd.find("div", class_="fileinfo")
     if info is not None and re.compile(r"\d NFO").search(info.text):  # 1 nfo file is missing if there is no NFO
         entry.has_nfo = NzbSearchResult.HAS_NFO_YES
     else:
         entry.has_nfo = NzbSearchResult.HAS_NFO_NO
     entry.poster = self.parse_poster(infotd)
     link = infotd.findAll('a', text=re.compile('Download'))
     if link is not None and len(link) == 1:
         entry.link = link[0]["href"]
     else:
         self.debug("Did not find link in row")
     complete = infotd.find("span", class_="complete")
     if complete:
         entry.files = complete.text[0:complete.text.find(" ")]
     entry.category = getUnknownCategory()
     sizetd = tds[2]
     entry.size = self.parse_size(sizetd)
     grouptd = tds[3]
     group = grouptd.text.replace("\n", "").replace("a.b.", "alt.binaries.").strip()
     entry.group = group
     agetd = tds[4]
     self.parse_date(agetd, entry)
     collection_links = infotd.findAll("a", href=True, text="View collection")
     if collection_links is not None and len(collection_links) > 0:
         entry.details_link = collection_links[0].attrs["href"]
     return entry
Ejemplo n.º 8
0
    def parseItem(self, elem):
        title = elem.find("title")
        url = elem.find("enclosure")
        pubdate = elem.find("pubDate")
        if title is None or url is None or pubdate is None:
            raise IndexerResultParsingRowException("Unable to find title, url or date")
        entry = self.create_nzb_search_result()
        if "password protect" in title.text.lower() or "passworded" in title.text.lower():
            entry.passworded = True
        p = re.compile(r'"(.*)"')
        m = p.search(title.text)
        if m:
            entry.title = m.group(1)
        else:
            entry.title = title.text
        entry.link = url.attrib["url"]
        entry.size = int(url.attrib["length"])
        entry.indexer = self.name
        entry.category = getUnknownCategory()
        entry.details_link = elem.find("link").text
        entry.indexerguid = elem.find("guid").text[-8:]  # GUID looks like "http://www.nzbclub.com/nzb_view58556415" of which we only want the last part
        description = elem.find("description").text
        description = urlparse.unquote(description).replace("+", " ")
        if re.compile(r"\d NFO Files").search(description):  # [x NFO Files] is missing if there is no NFO
            entry.has_nfo = NzbSearchResult.HAS_NFO_YES
        else:
            entry.has_nfo = NzbSearchResult.HAS_NFO_NO
        m = self.group_pattern.search(description)
        if m:
            entry.group = m.group(1).strip()
        m = self.poster_pattern.search(description)
        if m:
            entry.poster = m.group(1).strip()
        try:

            pubdate = arrow.get(pubdate.text, 'ddd, DD MMM YYYY HH:mm:ss Z')
            entry.epoch = pubdate.timestamp
            entry.pubdate_utc = str(pubdate)
            entry.age_days = (arrow.utcnow() - pubdate).days
            entry.pubDate = pubdate.format("ddd, DD MMM YYYY HH:mm:ss Z")
        except Exception:
            self.error("Unable to parse pubdate %s" % pubdate.text)
            raise IndexerResultParsingRowException("Unable to parse date")
        return entry
Ejemplo n.º 9
0
 def parseItem(self, item):
     entry = self.create_nzb_search_result()
     entry.indexerguid = item.find("nzbid").text
     entry.title = item.find("release").text
     entry.group = item.find("group").text
     entry.link = item.find("getnzb").text
     entry.size = long(item.find("sizebytes").text)
     entry.epoch = long(item.find("usenetage").text)
     pubdate = arrow.get(entry.epoch)
     entry.pubdate_utc = str(pubdate)
     entry.pubDate = pubdate.format("ddd, DD MMM YYYY HH:mm:ss Z")
     entry.age_days = (arrow.utcnow() - pubdate).days
     entry.age_precise = True
     entry.details_link = item.find("details").text
     entry.has_nfo = NzbSearchResult.HAS_NFO_YES if item.find("getnfo") is not None else NzbSearchResult.HAS_NFO_NO
     categoryid = item.find("categoryid").text
     if categoryid in omgwtf_to_categories.keys():
         entry.category = getCategoryByName(omgwtf_to_categories[categoryid])
     else:
         entry.category = getUnknownCategory()
     return entry
Ejemplo n.º 10
0
 def parseItem(self, item):
     entry = self.create_nzb_search_result()
     entry.indexerguid = item.find("nzbid").text
     entry.title = item.find("release").text
     entry.group = item.find("group").text
     entry.link = item.find("getnzb").text
     entry.size = long(item.find("sizebytes").text)
     entry.epoch = long(item.find("usenetage").text)
     pubdate = arrow.get(entry.epoch)
     entry.pubdate_utc = str(pubdate)
     entry.pubDate = pubdate.format("ddd, DD MMM YYYY HH:mm:ss Z")
     entry.age_days = (arrow.utcnow() - pubdate).days
     entry.age_precise = True
     entry.details_link = item.find("details").text
     entry.has_nfo = NzbSearchResult.HAS_NFO_YES if item.find("getnfo") is not None else NzbSearchResult.HAS_NFO_NO
     categoryid = item.find("categoryid").text
     if categoryid in omgwtf_to_categories.keys():
         entry.category = getCategoryByName(omgwtf_to_categories[categoryid])
     else:
         entry.category = getUnknownCategory()
     return entry
Ejemplo n.º 11
0
    def parseRow(self, row):
        entry = self.create_nzb_search_result()
        title = row.find('span', attrs={'class': 's'})

        if title is None:
            self.debug("Ignored entry because it has no title")
            raise IndexerResultParsingRowException("No title found")
        title = title.text

        if "password protect" in title.lower() or "passworded" in title.lower(
        ):
            entry.passworded = True

        m = self.title_pattern.search(title)
        if m:
            entry.title = m.group(1)
        else:
            entry.title = title

        entry.indexerguid = row.find("input", attrs={"type":
                                                     "checkbox"})["name"]
        entry.link = self.get_nzb_link(entry.indexerguid, None)
        info = row.find("span", attrs={"class": "d"})
        if info is None:
            self.debug("Ignored entry because it has no info")
            raise IndexerResultParsingRowException("No info found")

        collection_link = info.find(
            "a"
        )["href"]  # '/?b=MARVELS.AVENGERS.AGE.OF.ULTRON.3D.TOPBOT.TrueFrench.1080p.X264.A&g=alt.binaries.movies.mkv&p=Ramer%40marmer.com+%28Clown_nez%29&max=250'
        entry.details_link = "%s%s" % (self.host, collection_link)
        m = self.goup_pattern.search(collection_link)
        if m:
            entry.group = m.group(1).strip()

        m = self.poster_pattern.search(collection_link)
        if m:
            poster = m.group(1).strip()
            entry.poster = urlparse.unquote(poster).replace("+", " ")

        # Size
        m = self.size_pattern.search(info.text)
        if not m:
            self.debug("Unable to find size information in %s" % info.text)
        else:
            size = float(m.group("size"))
            unit = m.group("unit")
            if unit == "GB":
                size = size * 1024 * 1024 * 1024
            elif unit == "KB":
                size *= 1024
            elif unit == "MB":
                size = size * 1024 * 1024

            entry.size = int(size)

        entry.category = getUnknownCategory()

        if self.nfo_pattern.search(
                info.text):  # 1 nfo file is missing if there is no NFO
            entry.has_nfo = NzbSearchResult.HAS_NFO_YES
        else:
            entry.has_nfo = NzbSearchResult.HAS_NFO_NO

        # Age
        try:
            pubdate = re.compile(r"(\d{1,2}\-\w{3}\-\d{4})").search(
                row.text).group(1)
            pubdate = arrow.get(pubdate, "DD-MMM-YYYY")
            entry.epoch = pubdate.timestamp
            entry.pubdate_utc = str(pubdate)
            entry.age_days = (arrow.utcnow() - pubdate).days
            entry.age_precise = False
            entry.pubDate = pubdate.format("ddd, DD MMM YYYY HH:mm:ss Z")
        except Exception as e:
            self.error("Unable to find age in %s" %
                       row.find_all("td")[-1:][0].text)
            raise IndexerResultParsingRowException("Unable to parse age")
        return entry
Ejemplo n.º 12
0
    def process_query_result(self, xml_response, searchRequest, maxResults=None):
        self.debug("Started processing results")

        if "0 results found" in xml_response:
            return IndexerProcessingResult(entries=[], queries=[], total=0, total_known=True, has_more=False, rejected=0)
        if "search to short" in xml_response:
            self.info("omgwtf says the query was too short")
            return IndexerProcessingResult(entries=[], queries=[], total=0, total_known=True, has_more=False, rejected=0)
            
        entries = []
        countRejected = 0
        try:
            tree = ET.fromstring(xml_response)
        except Exception:
            self.exception("Error parsing XML: %s..." % xml_response[:500])
            raise IndexerResultParsingException("Error parsing XML", self)
        
        if tree.tag == "xml":
            total = int(tree.find("info").find("results").text)
            current_page = int(tree.find("info").find("current_page").text)
            total_pages = int(tree.find("info").find("pages").text)
            has_more = current_page < total_pages
            for item in tree.find("search_req").findall("post"):
                entry = self.parseItem(item)
                accepted, reason = self.accept_result(entry, searchRequest, self.supportedFilters)
                if accepted:
                    entries.append(entry)
                else:
                    countRejected += 1
                    self.debug("Rejected search result. Reason: %s" % reason)
            return IndexerProcessingResult(entries=entries, queries=[], total=total, total_known=True, has_more=has_more, rejected=countRejected)      
        elif tree.tag == "rss":
            for item in tree.find("channel").findall("item"):
                entry = self.create_nzb_search_result()
                indexerguid = item.find("guid").text
                m = self.regexGuid.match(indexerguid)
                if m:
                    entry.indexerguid = m.group(1)
                else:
                    self.warn("Unable to find GUID in " + indexerguid)
                    raise IndexerResultParsingRowException("Unable to find GUID")
                entry.title = item.find("title").text
                description = item.find("description").text
                m = self.regexGroup.match(description)
                if m:
                    entry.group = m.group(1)
                else:
                    self.warn("Unable to find group in " + description)
                    raise IndexerResultParsingRowException("Unable to find usenet group")
                entry.size = long(item.find("enclosure").attrib["length"])
                entry.pubDate = item.find("pubDate").text
                pubdate = arrow.get(entry.pubDate, 'ddd, DD MMM YYYY HH:mm:ss Z')
                entry.epoch = pubdate.timestamp
                entry.pubdate_utc = str(pubdate)
                entry.age_days = (arrow.utcnow() - pubdate).days
                entry.precise_date = True
                entry.link = item.find("link").text
                entry.has_nfo = NzbSearchResult.HAS_NFO_MAYBE
                categoryid = item.find("categoryid").text
                entry.details_link = self.get_details_link(entry.indexerguid)
                if categoryid in omgwtf_to_categories.keys():
                    entry.category = getCategoryByName(omgwtf_to_categories[categoryid])
                else:
                    entry.category = getUnknownCategory()
                accepted, reason = self.accept_result(entry, searchRequest, self.supportedFilters)
                if accepted:
                    entries.append(entry)
                else:
                    countRejected += 1
                    self.debug("Rejected search result. Reason: %s" % reason)
            return IndexerProcessingResult(entries=entries, queries=[], total=len(entries), total_known=True, has_more=False, rejected=countRejected)
        else:
            self.warn("Unknown response type: %s" % xml_response[:100])
            return IndexerProcessingResult(entries=[], queries=[], total=0, total_known=True, has_more=False, rejected=countRejected)
Ejemplo n.º 13
0
    def parseRow(self, row):
        entry = self.create_nzb_search_result()
        title = row.find('span', attrs={'class': 's'})

        if title is None:
            self.debug("Ignored entry because it has no title")
            raise IndexerResultParsingRowException("No title found")
        title = title.text

        if "password protect" in title.lower() or "passworded" in title.lower():
            entry.passworded = True

        m = self.title_pattern.search(title)
        if m:
            entry.title = m.group(1)
        else:
            entry.title = title

        entry.indexerguid = row.find("input", attrs={"type": "checkbox"})["name"]
        entry.link = self.get_nzb_link(entry.indexerguid, None)
        info = row.find("span", attrs={"class": "d"})
        if info is None:
            self.debug("Ignored entry because it has no info")
            raise IndexerResultParsingRowException("No info found")

        collection_link = info.find("a")["href"]  # '/?b=MARVELS.AVENGERS.AGE.OF.ULTRON.3D.TOPBOT.TrueFrench.1080p.X264.A&g=alt.binaries.movies.mkv&p=Ramer%40marmer.com+%28Clown_nez%29&max=250'
        entry.details_link = "%s%s" % (self.host, collection_link)
        m = self.goup_pattern.search(collection_link)
        if m:
            entry.group = m.group(1).strip()

        m = self.poster_pattern.search(collection_link)
        if m:
            poster = m.group(1).strip()
            try:
                entry.poster = urlparse.unquote(poster).replace("+", " ")
            except UnicodeDecodeError:
                logger.debug("Unable to decode poster from %s" % poster)
                entry.poster = None

        # Size
        m = self.size_pattern.search(info.text)
        if not m:
            self.debug("Unable to find size information in %s" % info.text)
        else:
            size = float(m.group("size"))
            unit = m.group("unit")
            if unit == "GB":
                size = size * 1024 * 1024 * 1024
            elif unit == "KB":
                size *= 1024
            elif unit == "MB":
                size = size * 1024 * 1024

            entry.size = int(size)

        entry.category = getUnknownCategory()

        if self.nfo_pattern.search(info.text):  # 1 nfo file is missing if there is no NFO
            entry.has_nfo = NzbSearchResult.HAS_NFO_YES
        else:
            entry.has_nfo = NzbSearchResult.HAS_NFO_NO

        # Age
        try:
            pubdate = re.compile(r"(\d{1,2}\-\w{3}\-\d{4})").search(row.text).group(1)
            pubdate = arrow.get(pubdate, "DD-MMM-YYYY")
            entry.epoch = pubdate.timestamp
            self.getDates(entry, pubdate, False)
        except Exception as e:
            self.error("Unable to find age in %s" % row.find_all("td")[-1:][0].text)
            raise IndexerResultParsingRowException("Unable to parse age")
        return entry
Ejemplo n.º 14
0
    def process_query_result(self, xml_response, searchRequest, maxResults=None):
        self.debug("Started processing results")

        if "0 results found" in xml_response:
            return IndexerProcessingResult(entries=[], queries=[], total=0, total_known=True, has_more=False, rejected=0)
        if "search to short" in xml_response:
            self.info("omgwtf says the query was too short")
            return IndexerProcessingResult(entries=[], queries=[], total=0, total_known=True, has_more=False, rejected=0)
            
        entries = []
        countRejected = 0
        try:
            tree = ET.fromstring(xml_response)
        except Exception:
            self.exception("Error parsing XML: %s..." % xml_response[:500])
            raise IndexerResultParsingException("Error parsing XML", self)
        
        if tree.tag == "xml":
            total = int(tree.find("info").find("results").text)
            current_page = int(tree.find("info").find("current_page").text)
            total_pages = int(tree.find("info").find("pages").text)
            has_more = current_page < total_pages
            for item in tree.find("search_req").findall("post"):
                entry = self.parseItem(item)
                accepted, reason = self.accept_result(entry, searchRequest, self.supportedFilters)
                if accepted:
                    entries.append(entry)
                else:
                    countRejected += 1
                    self.debug("Rejected search result. Reason: %s" % reason)
            return IndexerProcessingResult(entries=entries, queries=[], total=total, total_known=True, has_more=has_more, rejected=countRejected)      
        elif tree.tag == "rss":
            for item in tree.find("channel").findall("item"):
                entry = self.create_nzb_search_result()
                indexerguid = item.find("guid").text
                m = self.regexGuid.match(indexerguid)
                if m:
                    entry.indexerguid = m.group(1)
                else:
                    self.warn("Unable to find GUID in " + indexerguid)
                    raise IndexerResultParsingRowException("Unable to find GUID")
                entry.title = item.find("title").text
                description = item.find("description").text
                m = self.regexGroup.match(description)
                if m:
                    entry.group = m.group(1)
                else:
                    self.warn("Unable to find group in " + description)
                    raise IndexerResultParsingRowException("Unable to find usenet group")
                entry.size = long(item.find("enclosure").attrib["length"])
                entry.pubDate = item.find("pubDate").text
                pubdate = arrow.get(entry.pubDate, 'ddd, DD MMM YYYY HH:mm:ss Z')
                entry.epoch = pubdate.timestamp
                entry.pubdate_utc = str(pubdate)
                entry.age_days = (arrow.utcnow() - pubdate).days
                entry.precise_date = True
                entry.link = item.find("link").text
                entry.has_nfo = NzbSearchResult.HAS_NFO_MAYBE
                categoryid = item.find("categoryid").text
                entry.details_link = self.get_details_link(entry.indexerguid)
                if categoryid in omgwtf_to_categories.keys():
                    entry.category = getCategoryByName(omgwtf_to_categories[categoryid])
                else:
                    entry.category = getUnknownCategory()
                accepted, reason = self.accept_result(entry, searchRequest, self.supportedFilters)
                if accepted:
                    entries.append(entry)
                else:
                    countRejected += 1
                    self.debug("Rejected search result. Reason: %s" % reason)
            return IndexerProcessingResult(entries=entries, queries=[], total=len(entries), total_known=True, has_more=False, rejected=countRejected)
        else:
            self.warn("Unknown response type: %s" % xml_response[:100])
            return IndexerProcessingResult(entries=[], queries=[], total=0, total_known=True, has_more=False, rejected=countRejected)