def __init__(self, title=None, link=None, indexer=None, guid=None, size=None, category=None, attributes=None, epoch=None, pubDate=None, pubdate_utc=None, age_days=None, poster=None, has_nfo=HAS_NFO_YES, indexerguid=None, details_link=None, group=None, indexerscore=0, dbsearchid=None, passworded=False): self.title = title self.link = link self.epoch = epoch self.pubdate_utc = pubdate_utc self.age_days = age_days self.age_precise = True #Set to false if the age is not received from a pubdate but from an age. That might influence duplicity check self.indexer = indexer self.guid = guid self.indexerguid = indexerguid #The GUID of the indexer which we will later need to download the actual NZB self.size = size self.category = category if category is not None else categories.getUnknownCategory() self.description = None self.comments = None self.attributes = attributes if attributes is not None else [] self.search_types = [] #"general", "tv", "movie" self.supports_queries = True #Indexers might only provide a feed of the latest releases, e.g. womble self.search_ids = [] #"tvdbid", "rid", "imdbid" self.poster = poster self.has_nfo = has_nfo self.details_link = details_link self.group = group self.indexerscore = indexerscore self.dbsearchid = dbsearchid self.passworded = passworded self.pubDate = pubDate
def process_query_result(self, xml, searchRequest, maxResults=None): entries = [] countRejected = self.getRejectedCountDict() try: tree = ET.fromstring(xml) except Exception: self.exception("Error parsing XML: %s..." % xml[:500]) logger.debug(xml) raise IndexerResultParsingException("Error parsing XML", self) for elem in tree.iter('item'): title = elem.find("title") url = elem.find("enclosure") pubdate = elem.find("pubDate") if title is None or url is None or pubdate is None: continue entry = self.create_nzb_search_result() entry.title = title.text entry.link = url.attrib["url"] entry.has_nfo = NzbSearchResult.HAS_NFO_NO p = re.compile("(.*)\(Size:(\d*)") m = p.search(elem.find("description").text) if m: entry.description = m.group(1) entry.size = int(m.group(2)) * 1024 * 1024 #megabyte to byte if elem.find("category").text.lower() == "tv-dvdrip" or elem.find( "category").text.lower() == "tv-sd": entry.category = getCategoryByName("tvsd") elif elem.find("category").text.lower() == "tv-x264" or elem.find( "category").text.lower == "tv-hd": entry.category = getCategoryByName("tvhd") else: entry.category = getUnknownCategory() entry.indexerguid = elem.find("guid").text[ 30:] #39a/The.Almighty.Johnsons.S03E06.720p.BluRay.x264-YELLOWBiRD.nzb is the GUID, only the 39a doesn't work pubdate = arrow.get(pubdate.text, 'M/D/YYYY h:mm:ss A') entry.epoch = pubdate.timestamp entry.pubdate_utc = str(pubdate) entry.pubDate = pubdate.format("ddd, DD MMM YYYY HH:mm:ss Z") entry.age_days = (arrow.utcnow() - pubdate).days entry.age = (arrow.utcnow() - pubdate).days accepted, reason, ri = self.accept_result(entry, searchRequest, self.supportedFilters) if accepted: entries.append(entry) else: countRejected[ri] += 1 self.debug("Rejected search result. Reason: %s" % reason) return IndexerProcessingResult(entries=entries, queries=[], total_known=True, has_more=False, total=len(entries), rejected=countRejected)
def process_query_result(self, xml, searchRequest, maxResults=None): entries = [] countRejected = self.getRejectedCountDict() try: tree = ET.fromstring(xml) except Exception: self.exception("Error parsing XML: %s..." % xml[:500]) logger.debug(xml) raise IndexerResultParsingException("Error parsing XML", self) for elem in tree.iter('item'): title = elem.find("title") url = elem.find("enclosure") pubdate = elem.find("pubDate") if title is None or url is None or pubdate is None: continue entry = self.create_nzb_search_result() entry.title = title.text entry.link = url.attrib["url"] entry.has_nfo = NzbSearchResult.HAS_NFO_NO p = re.compile("(.*)\(Size:(\d*)") m = p.search(elem.find("description").text) if m: entry.description = m.group(1) entry.size = int(m.group(2)) * 1024 * 1024 #megabyte to byte if elem.find("category").text.lower() == "tv-dvdrip" or elem.find("category").text.lower() == "tv-sd": entry.category = getCategoryByName("tvsd") elif elem.find("category").text.lower() == "tv-x264" or elem.find("category").text.lower == "tv-hd": entry.category = getCategoryByName("tvhd") else: entry.category = getUnknownCategory() entry.indexerguid = elem.find("guid").text[30:] #39a/The.Almighty.Johnsons.S03E06.720p.BluRay.x264-YELLOWBiRD.nzb is the GUID, only the 39a doesn't work pubdate = arrow.get(pubdate.text, 'M/D/YYYY h:mm:ss A') entry.epoch = pubdate.timestamp entry.pubdate_utc = str(pubdate) entry.pubDate = pubdate.format("ddd, DD MMM YYYY HH:mm:ss Z") entry.age_days = (arrow.utcnow() - pubdate).days entry.age = (arrow.utcnow() - pubdate).days accepted, reason, ri = self.accept_result(entry, searchRequest, self.supportedFilters) if accepted: entries.append(entry) else: countRejected[ri] += 1 self.debug("Rejected search result. Reason: %s" % reason) return IndexerProcessingResult(entries=entries, queries=[], total_known=True, has_more=False, total=len(entries), rejected=countRejected)
def parseRow(self, row): tds = list(row.find_all("td")) if len(tds) != 5: # advertisement raise IndexerResultParsingRowException("Ad") entry = self.create_nzb_search_result() entry.indexerguid = row.find("input")["value"] infotd = tds[1] if "password protected" in infotd.text.lower(): entry.passworded = True title = infotd.find("label").text title = title.replace("\n", "") title = re.sub(" +", " ", title) m = self.title_pattern.search(title) if m: entry.title = m.group(1) else: entry.title = title entry.title = self.cleanUpTitle(entry.title) info = infotd.find("div", class_="fileinfo") if info is not None and re.compile(r"\d NFO").search( info.text): # 1 nfo file is missing if there is no NFO entry.has_nfo = NzbSearchResult.HAS_NFO_YES else: entry.has_nfo = NzbSearchResult.HAS_NFO_NO entry.poster = self.parse_poster(infotd) link = infotd.findAll('a', text=re.compile('Download')) if link is not None and len(link) == 1: entry.link = link[0]["href"] else: self.debug("Did not find link in row") complete = infotd.find("span", class_="complete") if complete: entry.files = complete.text[0:complete.text.find(" ")] entry.category = getUnknownCategory() sizetd = tds[2] entry.size = self.parse_size(sizetd) grouptd = tds[3] group = grouptd.text.replace("\n", "").replace("a.b.", "alt.binaries.").strip() entry.group = group agetd = tds[4] self.parse_date(agetd, entry) collection_links = infotd.findAll("a", href=True, text="View collection") if collection_links is not None and len(collection_links) > 0: entry.details_link = collection_links[0].attrs["href"] return entry
def parseItem(self, elem): title = elem.find("title") url = elem.find("enclosure") pubdate = elem.find("pubDate") if title is None or url is None or pubdate is None: raise IndexerResultParsingRowException( "Unable to find title, url or date") entry = self.create_nzb_search_result() if "password protect" in title.text.lower( ) or "passworded" in title.text.lower(): entry.passworded = True p = re.compile(r'"(.*)"') m = p.search(title.text) if m: entry.title = m.group(1) else: entry.title = title.text entry.link = url.attrib["url"] entry.size = int(url.attrib["length"]) entry.indexer = self.name entry.category = getUnknownCategory() entry.details_link = elem.find("link").text entry.indexerguid = elem.find("guid").text[ -8:] # GUID looks like "http://www.nzbclub.com/nzb_view58556415" of which we only want the last part description = elem.find("description").text description = urlparse.unquote(description).replace("+", " ") if re.compile(r"\d NFO Files").search( description): # [x NFO Files] is missing if there is no NFO entry.has_nfo = NzbSearchResult.HAS_NFO_YES else: entry.has_nfo = NzbSearchResult.HAS_NFO_NO m = self.group_pattern.search(description) if m: entry.group = m.group(1).strip() m = self.poster_pattern.search(description) if m: entry.poster = m.group(1).strip() try: pubdate = arrow.get(pubdate.text, 'ddd, DD MMM YYYY HH:mm:ss Z') entry.epoch = pubdate.timestamp entry.pubdate_utc = str(pubdate) entry.age_days = (arrow.utcnow() - pubdate).days entry.pubDate = pubdate.format("ddd, DD MMM YYYY HH:mm:ss Z") except Exception: self.error("Unable to parse pubdate %s" % pubdate.text) raise IndexerResultParsingRowException("Unable to parse date") return entry
def __init__(self, title=None, link=None, indexer=None, guid=None, size=None, category=None, attributes=None, epoch=None, pubDate=None, pubdate_utc=None, age_days=None, poster=None, has_nfo=HAS_NFO_YES, indexerguid=None, details_link=None, group=None, indexerscore=0, dbsearchid=None, passworded=False, downloadType="nzb"): self.title = title self.link = link self.epoch = epoch self.pubdate_utc = pubdate_utc self.age_days = age_days self.age_precise = True #Set to false if the age is not received from a pubdate but from an age. That might influence duplicity check self.indexer = indexer self.guid = guid self.indexerguid = indexerguid #The GUID of the indexer which we will later need to download the actual NZB self.size = size self.category = category if category is not None else categories.getUnknownCategory( ) self.description = None self.comments = None self.attributes = attributes if attributes is not None else [] self.search_types = [] #"general", "tv", "movie" self.supports_queries = True #Indexers might only provide a feed of the latest releases, e.g. womble self.search_ids = [] #"tvdbid", "rid", "imdbid" self.poster = poster self.has_nfo = has_nfo self.details_link = details_link self.group = group self.indexerscore = indexerscore self.dbsearchid = dbsearchid self.passworded = passworded self.pubDate = pubDate self.downloadType = downloadType
def parseRow(self, row): tds = list(row.find_all("td")) if len(tds) != 5: # advertisement raise IndexerResultParsingRowException("Ad") entry = self.create_nzb_search_result() entry.indexerguid = row.find("input")["value"] infotd = tds[1] if "password protected" in infotd.text.lower(): entry.passworded = True title = infotd.find("label").text title = title.replace("\n", "") title = re.sub(" +", " ", title) m = self.title_pattern.search(title) if m: entry.title = m.group(1) else: entry.title = title entry.title = self.cleanUpTitle(entry.title) info = infotd.find("div", class_="fileinfo") if info is not None and re.compile(r"\d NFO").search(info.text): # 1 nfo file is missing if there is no NFO entry.has_nfo = NzbSearchResult.HAS_NFO_YES else: entry.has_nfo = NzbSearchResult.HAS_NFO_NO entry.poster = self.parse_poster(infotd) link = infotd.findAll('a', text=re.compile('Download')) if link is not None and len(link) == 1: entry.link = link[0]["href"] else: self.debug("Did not find link in row") complete = infotd.find("span", class_="complete") if complete: entry.files = complete.text[0:complete.text.find(" ")] entry.category = getUnknownCategory() sizetd = tds[2] entry.size = self.parse_size(sizetd) grouptd = tds[3] group = grouptd.text.replace("\n", "").replace("a.b.", "alt.binaries.").strip() entry.group = group agetd = tds[4] self.parse_date(agetd, entry) collection_links = infotd.findAll("a", href=True, text="View collection") if collection_links is not None and len(collection_links) > 0: entry.details_link = collection_links[0].attrs["href"] return entry
def parseItem(self, elem): title = elem.find("title") url = elem.find("enclosure") pubdate = elem.find("pubDate") if title is None or url is None or pubdate is None: raise IndexerResultParsingRowException("Unable to find title, url or date") entry = self.create_nzb_search_result() if "password protect" in title.text.lower() or "passworded" in title.text.lower(): entry.passworded = True p = re.compile(r'"(.*)"') m = p.search(title.text) if m: entry.title = m.group(1) else: entry.title = title.text entry.link = url.attrib["url"] entry.size = int(url.attrib["length"]) entry.indexer = self.name entry.category = getUnknownCategory() entry.details_link = elem.find("link").text entry.indexerguid = elem.find("guid").text[-8:] # GUID looks like "http://www.nzbclub.com/nzb_view58556415" of which we only want the last part description = elem.find("description").text description = urlparse.unquote(description).replace("+", " ") if re.compile(r"\d NFO Files").search(description): # [x NFO Files] is missing if there is no NFO entry.has_nfo = NzbSearchResult.HAS_NFO_YES else: entry.has_nfo = NzbSearchResult.HAS_NFO_NO m = self.group_pattern.search(description) if m: entry.group = m.group(1).strip() m = self.poster_pattern.search(description) if m: entry.poster = m.group(1).strip() try: pubdate = arrow.get(pubdate.text, 'ddd, DD MMM YYYY HH:mm:ss Z') entry.epoch = pubdate.timestamp entry.pubdate_utc = str(pubdate) entry.age_days = (arrow.utcnow() - pubdate).days entry.pubDate = pubdate.format("ddd, DD MMM YYYY HH:mm:ss Z") except Exception: self.error("Unable to parse pubdate %s" % pubdate.text) raise IndexerResultParsingRowException("Unable to parse date") return entry
def parseItem(self, item): entry = self.create_nzb_search_result() entry.indexerguid = item.find("nzbid").text entry.title = item.find("release").text entry.group = item.find("group").text entry.link = item.find("getnzb").text entry.size = long(item.find("sizebytes").text) entry.epoch = long(item.find("usenetage").text) pubdate = arrow.get(entry.epoch) entry.pubdate_utc = str(pubdate) entry.pubDate = pubdate.format("ddd, DD MMM YYYY HH:mm:ss Z") entry.age_days = (arrow.utcnow() - pubdate).days entry.age_precise = True entry.details_link = item.find("details").text entry.has_nfo = NzbSearchResult.HAS_NFO_YES if item.find("getnfo") is not None else NzbSearchResult.HAS_NFO_NO categoryid = item.find("categoryid").text if categoryid in omgwtf_to_categories.keys(): entry.category = getCategoryByName(omgwtf_to_categories[categoryid]) else: entry.category = getUnknownCategory() return entry
def parseRow(self, row): entry = self.create_nzb_search_result() title = row.find('span', attrs={'class': 's'}) if title is None: self.debug("Ignored entry because it has no title") raise IndexerResultParsingRowException("No title found") title = title.text if "password protect" in title.lower() or "passworded" in title.lower( ): entry.passworded = True m = self.title_pattern.search(title) if m: entry.title = m.group(1) else: entry.title = title entry.indexerguid = row.find("input", attrs={"type": "checkbox"})["name"] entry.link = self.get_nzb_link(entry.indexerguid, None) info = row.find("span", attrs={"class": "d"}) if info is None: self.debug("Ignored entry because it has no info") raise IndexerResultParsingRowException("No info found") collection_link = info.find( "a" )["href"] # '/?b=MARVELS.AVENGERS.AGE.OF.ULTRON.3D.TOPBOT.TrueFrench.1080p.X264.A&g=alt.binaries.movies.mkv&p=Ramer%40marmer.com+%28Clown_nez%29&max=250' entry.details_link = "%s%s" % (self.host, collection_link) m = self.goup_pattern.search(collection_link) if m: entry.group = m.group(1).strip() m = self.poster_pattern.search(collection_link) if m: poster = m.group(1).strip() entry.poster = urlparse.unquote(poster).replace("+", " ") # Size m = self.size_pattern.search(info.text) if not m: self.debug("Unable to find size information in %s" % info.text) else: size = float(m.group("size")) unit = m.group("unit") if unit == "GB": size = size * 1024 * 1024 * 1024 elif unit == "KB": size *= 1024 elif unit == "MB": size = size * 1024 * 1024 entry.size = int(size) entry.category = getUnknownCategory() if self.nfo_pattern.search( info.text): # 1 nfo file is missing if there is no NFO entry.has_nfo = NzbSearchResult.HAS_NFO_YES else: entry.has_nfo = NzbSearchResult.HAS_NFO_NO # Age try: pubdate = re.compile(r"(\d{1,2}\-\w{3}\-\d{4})").search( row.text).group(1) pubdate = arrow.get(pubdate, "DD-MMM-YYYY") entry.epoch = pubdate.timestamp entry.pubdate_utc = str(pubdate) entry.age_days = (arrow.utcnow() - pubdate).days entry.age_precise = False entry.pubDate = pubdate.format("ddd, DD MMM YYYY HH:mm:ss Z") except Exception as e: self.error("Unable to find age in %s" % row.find_all("td")[-1:][0].text) raise IndexerResultParsingRowException("Unable to parse age") return entry
def process_query_result(self, xml_response, searchRequest, maxResults=None): self.debug("Started processing results") if "0 results found" in xml_response: return IndexerProcessingResult(entries=[], queries=[], total=0, total_known=True, has_more=False, rejected=0) if "search to short" in xml_response: self.info("omgwtf says the query was too short") return IndexerProcessingResult(entries=[], queries=[], total=0, total_known=True, has_more=False, rejected=0) entries = [] countRejected = 0 try: tree = ET.fromstring(xml_response) except Exception: self.exception("Error parsing XML: %s..." % xml_response[:500]) raise IndexerResultParsingException("Error parsing XML", self) if tree.tag == "xml": total = int(tree.find("info").find("results").text) current_page = int(tree.find("info").find("current_page").text) total_pages = int(tree.find("info").find("pages").text) has_more = current_page < total_pages for item in tree.find("search_req").findall("post"): entry = self.parseItem(item) accepted, reason = self.accept_result(entry, searchRequest, self.supportedFilters) if accepted: entries.append(entry) else: countRejected += 1 self.debug("Rejected search result. Reason: %s" % reason) return IndexerProcessingResult(entries=entries, queries=[], total=total, total_known=True, has_more=has_more, rejected=countRejected) elif tree.tag == "rss": for item in tree.find("channel").findall("item"): entry = self.create_nzb_search_result() indexerguid = item.find("guid").text m = self.regexGuid.match(indexerguid) if m: entry.indexerguid = m.group(1) else: self.warn("Unable to find GUID in " + indexerguid) raise IndexerResultParsingRowException("Unable to find GUID") entry.title = item.find("title").text description = item.find("description").text m = self.regexGroup.match(description) if m: entry.group = m.group(1) else: self.warn("Unable to find group in " + description) raise IndexerResultParsingRowException("Unable to find usenet group") entry.size = long(item.find("enclosure").attrib["length"]) entry.pubDate = item.find("pubDate").text pubdate = arrow.get(entry.pubDate, 'ddd, DD MMM YYYY HH:mm:ss Z') entry.epoch = pubdate.timestamp entry.pubdate_utc = str(pubdate) entry.age_days = (arrow.utcnow() - pubdate).days entry.precise_date = True entry.link = item.find("link").text entry.has_nfo = NzbSearchResult.HAS_NFO_MAYBE categoryid = item.find("categoryid").text entry.details_link = self.get_details_link(entry.indexerguid) if categoryid in omgwtf_to_categories.keys(): entry.category = getCategoryByName(omgwtf_to_categories[categoryid]) else: entry.category = getUnknownCategory() accepted, reason = self.accept_result(entry, searchRequest, self.supportedFilters) if accepted: entries.append(entry) else: countRejected += 1 self.debug("Rejected search result. Reason: %s" % reason) return IndexerProcessingResult(entries=entries, queries=[], total=len(entries), total_known=True, has_more=False, rejected=countRejected) else: self.warn("Unknown response type: %s" % xml_response[:100]) return IndexerProcessingResult(entries=[], queries=[], total=0, total_known=True, has_more=False, rejected=countRejected)
def parseRow(self, row): entry = self.create_nzb_search_result() title = row.find('span', attrs={'class': 's'}) if title is None: self.debug("Ignored entry because it has no title") raise IndexerResultParsingRowException("No title found") title = title.text if "password protect" in title.lower() or "passworded" in title.lower(): entry.passworded = True m = self.title_pattern.search(title) if m: entry.title = m.group(1) else: entry.title = title entry.indexerguid = row.find("input", attrs={"type": "checkbox"})["name"] entry.link = self.get_nzb_link(entry.indexerguid, None) info = row.find("span", attrs={"class": "d"}) if info is None: self.debug("Ignored entry because it has no info") raise IndexerResultParsingRowException("No info found") collection_link = info.find("a")["href"] # '/?b=MARVELS.AVENGERS.AGE.OF.ULTRON.3D.TOPBOT.TrueFrench.1080p.X264.A&g=alt.binaries.movies.mkv&p=Ramer%40marmer.com+%28Clown_nez%29&max=250' entry.details_link = "%s%s" % (self.host, collection_link) m = self.goup_pattern.search(collection_link) if m: entry.group = m.group(1).strip() m = self.poster_pattern.search(collection_link) if m: poster = m.group(1).strip() try: entry.poster = urlparse.unquote(poster).replace("+", " ") except UnicodeDecodeError: logger.debug("Unable to decode poster from %s" % poster) entry.poster = None # Size m = self.size_pattern.search(info.text) if not m: self.debug("Unable to find size information in %s" % info.text) else: size = float(m.group("size")) unit = m.group("unit") if unit == "GB": size = size * 1024 * 1024 * 1024 elif unit == "KB": size *= 1024 elif unit == "MB": size = size * 1024 * 1024 entry.size = int(size) entry.category = getUnknownCategory() if self.nfo_pattern.search(info.text): # 1 nfo file is missing if there is no NFO entry.has_nfo = NzbSearchResult.HAS_NFO_YES else: entry.has_nfo = NzbSearchResult.HAS_NFO_NO # Age try: pubdate = re.compile(r"(\d{1,2}\-\w{3}\-\d{4})").search(row.text).group(1) pubdate = arrow.get(pubdate, "DD-MMM-YYYY") entry.epoch = pubdate.timestamp self.getDates(entry, pubdate, False) except Exception as e: self.error("Unable to find age in %s" % row.find_all("td")[-1:][0].text) raise IndexerResultParsingRowException("Unable to parse age") return entry