def process_query_result(self, xml_response, searchRequest, maxResults=None): self.debug("Started processing results") countRejected = self.getRejectedCountDict() acceptedEntries = [] entries, total, offset = self.parseXml(xml_response, maxResults) for entry in entries: accepted, reason, ri = self.accept_result(entry, searchRequest, self.supportedFilters) if accepted: acceptedEntries.append(entry) else: countRejected[ri] += 1 self.debug("Rejected search result. Reason: %s" % reason) if total == 0 or len(acceptedEntries) == 0: self.info("Query returned no results") return IndexerProcessingResult(entries=acceptedEntries, queries=[], total=0, total_known=True, has_more=False, rejected=countRejected) else: return IndexerProcessingResult( entries=acceptedEntries, queries=[], total=total, total_known=True, has_more=offset + len(entries) < total, rejected=countRejected)
def process_query_result(self, xml, searchRequest, maxResults=None): self.debug("Started processing results") entries = [] countRejected = self.getRejectedCountDict() try: tree = ET.fromstring(xml) except Exception: self.exception("Error parsing XML: %s..." % xml[:500]) self.debug(xml[:500]) raise IndexerResultParsingException( "Error while parsing XML from NZBClub", self) for item in tree.iter('item'): try: entry = self.parseItem(item) except IndexerResultParsingRowException: continue accepted, reason, ri = self.accept_result(entry, searchRequest, self.supportedFilters) if accepted: entries.append(entry) else: countRejected[ri] += 1 self.debug("Rejected search result. Reason: %s" % reason) self.debug("Finished processing results") return IndexerProcessingResult( entries=entries, queries=[], total=len(entries), total_known=True, has_more=False, rejected=countRejected ) # No paging with RSS. Might need/want to change to HTML and BS
def process_query_result(self, xml, searchRequest, maxResults=None): entries = [] countRejected = self.getRejectedCountDict() try: tree = ET.fromstring(xml) except Exception: self.exception("Error parsing XML: %s..." % xml[:500]) logger.debug(xml) raise IndexerResultParsingException("Error parsing XML", self) for elem in tree.iter('item'): title = elem.find("title") url = elem.find("enclosure") pubdate = elem.find("pubDate") if title is None or url is None or pubdate is None: continue entry = self.create_nzb_search_result() entry.title = title.text entry.link = url.attrib["url"] entry.has_nfo = NzbSearchResult.HAS_NFO_NO p = re.compile("(.*)\(Size:(\d*)") m = p.search(elem.find("description").text) if m: entry.description = m.group(1) entry.size = int(m.group(2)) * 1024 * 1024 #megabyte to byte if elem.find("category").text.lower() == "tv-dvdrip" or elem.find( "category").text.lower() == "tv-sd": entry.category = getCategoryByName("tvsd") elif elem.find("category").text.lower() == "tv-x264" or elem.find( "category").text.lower == "tv-hd": entry.category = getCategoryByName("tvhd") else: entry.category = getUnknownCategory() entry.indexerguid = elem.find("guid").text[ 30:] #39a/The.Almighty.Johnsons.S03E06.720p.BluRay.x264-YELLOWBiRD.nzb is the GUID, only the 39a doesn't work pubdate = arrow.get(pubdate.text, 'M/D/YYYY h:mm:ss A') entry.epoch = pubdate.timestamp entry.pubdate_utc = str(pubdate) entry.pubDate = pubdate.format("ddd, DD MMM YYYY HH:mm:ss Z") entry.age_days = (arrow.utcnow() - pubdate).days entry.age = (arrow.utcnow() - pubdate).days accepted, reason, ri = self.accept_result(entry, searchRequest, self.supportedFilters) if accepted: entries.append(entry) else: countRejected[ri] += 1 self.debug("Rejected search result. Reason: %s" % reason) return IndexerProcessingResult(entries=entries, queries=[], total_known=True, has_more=False, total=len(entries), rejected=countRejected)
def process_query_result(self, html, searchRequest, maxResults=None): self.debug("Started processing results") entries = [] countRejected = self.getRejectedCountDict() logger.debug("Using HTML parser %s" % config.settings.searching.htmlParser) soup = BeautifulSoup(html, config.settings.searching.htmlParser) main_table = soup.find(id="results").find('table') if "No results found" in soup.text: return IndexerProcessingResult(entries=[], queries=[], total=0, total_known=True, has_more=False, rejected=self.getRejectedCountDict()) if not main_table or not main_table.find("tbody"): self.error("Unable to find main table in NZBIndex page: %s..." % html[:500]) self.debug(html[:500]) raise IndexerResultParsingException("Unable to find main table in NZBIndex page", self) items = main_table.find("tbody").find_all('tr') for row in items: try: entry = self.parseRow(row) except IndexerResultParsingRowException: continue accepted, reason, ri = self.accept_result(entry, searchRequest, self.supportedFilters) if accepted: entries.append(entry) else: countRejected[ri] += 1 self.debug("Rejected search result. Reason: %s" % reason) try: page_links = main_table.find("tfoot").find_all("tr")[1].find_all('a') if len(page_links) == 0: total = len(entries) has_more = False else: pagecount = int(page_links[-2].text) currentpage = int(main_table.find("tfoot").find_all("tr")[1].find("b").text) #Don't count "next" has_more = pagecount > currentpage total = self.limit * pagecount #Good enough except Exception: self.exception("Error while trying to find page count") total = len(entries) has_more = False self.debug("Finished processing results") return IndexerProcessingResult(entries=entries, queries=[], total=total, total_known=True, has_more=has_more, rejected=countRejected)
def process_query_result(self, xml, searchRequest, maxResults=None): entries = [] countRejected = 0 try: tree = ET.fromstring(xml) except Exception: self.exception("Error parsing XML: %s..." % xml[:500]) logger.debug(xml) raise IndexerResultParsingException("Error parsing XML", self) for elem in tree.iter('item'): title = elem.find("title") url = elem.find("enclosure") pubdate = elem.find("pubDate") if title is None or url is None or pubdate is None: continue entry = self.create_nzb_search_result() entry.title = title.text entry.link = url.attrib["url"] entry.size = int(url.attrib["length"]) entry.has_nfo = NzbSearchResult.HAS_NFO_NO entry.category = getCategoryByName("anime") entry.indexerguid = elem.find("guid").text entry.details_link = entry.link.replace("dl", "info") pubdate = arrow.get(pubdate.text, 'ddd, DD MMM YYYY HH:mm:ss Z') entry.epoch = pubdate.timestamp entry.pubdate_utc = str(pubdate) entry.pubDate = pubdate entry.age_days = (arrow.utcnow() - pubdate).days accepted, reason = self.accept_result(entry, searchRequest, self.supportedFilters) if accepted: entries.append(entry) else: countRejected += 1 self.debug("Rejected search result. Reason: %s" % reason) return IndexerProcessingResult(entries=entries, queries=[], total_known=True, has_more=False, total=len(entries), rejected=countRejected)
def process_query_result(self, html, searchRequest, maxResults=None): self.debug("Started processing results") logger.info("Last results count %d" % self.last_results_count) entries = Set([]) countRejected = 0 self.debug("Using HTML parser %s" % config.settings.searching.htmlParser) soup = BeautifulSoup(html, config.settings.searching.htmlParser) if "No results in most popular groups" in soup.text: logger.info("No results found for query") return IndexerProcessingResult(entries=[], queries=[], total_known=0, has_more=False, total=0, rejected=0) main_table = soup.find('table', attrs={'id': 'r2'}) if not main_table: self.debug(html[:500]) raise IndexerResultParsingException( "Unable to find main table in binsearch page. This happens sometimes... :-)", self) items = main_table.find_all('tr') for row in items: try: entry = self.parseRow(row) except IndexerResultParsingRowException: continue accepted, reason = self.accept_result(entry, searchRequest, self.supportedFilters) if accepted: entries.add(entry) else: countRejected += 1 self.debug("Rejected search result. Reason: %s" % reason) self.debug("Finished processing %d results" % len(entries)) page_links = soup.find_all('table', attrs={'class': 'xMenuT'})[1].find_all("a") has_more = len(page_links) > 0 and page_links[-1].text == ">" total_known = False total = 100 if len(page_links) == 0: m = re.compile(r".* (\d+)\+? records.*").search( soup.find_all('table', attrs={'class': 'xMenuT'})[1].text) if m: total = int(m.group(1)) total_known = True return IndexerProcessingResult(entries=entries, queries=[], total_known=total_known, has_more=has_more, total=total, rejected=countRejected)
def process_query_result(self, html, maxResults=None): self.debug("Started processing results") entries = [] logger.debug("Using HTML parser %s" % config.settings.searching.htmlParser) soup = BeautifulSoup(html, config.settings.searching.htmlParser) main_table = soup.find(id="results").find('table') if "No results found" in soup.text: return IndexerProcessingResult(entries=[], queries=[], total=0, total_known=True, has_more=False) if not main_table or not main_table.find("tbody"): self.error("Unable to find main table in NZBIndex page: %s..." % html[:500]) self.debug(html[:500]) raise IndexerResultParsingException( "Unable to find main table in NZBIndex page", self) items = main_table.find("tbody").find_all('tr') size_pattern = re.compile( r"(?P<size>[0-9]+(\.[0-9]+)?).(?P<unit>(GB|MB|KB|B))") age_pattern = re.compile(r"(?P<days1>\d+)\.(?P<days2>\d)") title_pattern = re.compile( r'"(.*)\.(rar|nfo|mkv|par2|001|nzb|url|zip|r[0-9]{2})"') for row in items: tds = list(row.find_all("td")) if len(tds) != 5: # advertisement continue entry = self.create_nzb_search_result() entry.indexerguid = row.find("input")["value"] infotd = tds[1] if "password protected" in infotd.text.lower(): entry.passworded = True title = infotd.find("label").text title = title.replace("\n", "") title = re.sub(" +", "", title) m = title_pattern.search(title) if m: entry.title = m.group(1) else: entry.title = title info = infotd.find("div", class_="fileinfo") if info is not None and re.compile(r"\d NFO").search( info.text): # 1 nfo file is missing if there is no NFO entry.has_nfo = NzbSearchResult.HAS_NFO_YES else: entry.has_nfo = NzbSearchResult.HAS_NFO_NO poster = infotd.find("span", class_="poster").find("a") if poster is not None: poster = poster.text.replace("\n", "") poster = re.sub(" +", "", poster) entry.poster = poster.replace("(", " (").replace("<", " <").strip() link = infotd.findAll('a', text=re.compile('Download')) if link is not None and len(link) == 1: entry.link = link[0]["href"] else: self.debug("Did not find link in row") entry.category = "N/A" sizetd = tds[2] m = size_pattern.search(sizetd.text) if not m: self.debug("Unable to find size information in %s" % sizetd.text) else: size = float(m.group("size")) unit = m.group("unit") if unit == "KB": size *= 1024 elif unit == "MB": size = size * 1024 * 1024 elif unit == "GB": size = size * 1024 * 1024 * 1024 entry.size = int(size) grouptd = tds[3] group = grouptd.text.replace("\n", "").replace("a.b.", "alt.binaries.").strip() entry.group = group agetd = tds[4] m = age_pattern.search(agetd.text) days = None hours = None if m: days = int(m.group("days1")) hours = int(m.group("days2")) * 2.4 else: p = re.compile(r"(?P<hours>\d+) hours?") m = p.search(agetd.text) if m: days = 0 hours = int(m.group("hours")) if hours is not None: pubdate = arrow.utcnow().replace( days=-days, hours=-1) # hours because of timezone change below if hours > 0: pubdate = pubdate.replace(hours=-hours) pubdate = pubdate.to( "+01:00") # nzbindex server time, I guess? entry.epoch = pubdate.timestamp entry.pubdate_utc = str(pubdate) entry.age_days = (arrow.utcnow() - pubdate).days entry.age_precise = True # Precise to 2.4 hours, should be enough for duplicate detection entry.pubDate = pubdate.format("ddd, DD MMM YYYY HH:mm:ss Z") else: self.debug("Found no age info in %s" % str(agetd)) collection_links = infotd.findAll("a", href=True, text="View collection") if collection_links is not None and len(collection_links) > 0: entry.details_link = collection_links[0].attrs["href"] accepted, reason = self.accept_result(entry) if accepted: entries.append(entry) else: self.debug("Rejected search result. Reason: %s" % reason) try: page_links = main_table.find("tfoot").find_all("tr")[1].find_all( 'a') if len(page_links) == 0: total = len(entries) has_more = False else: pagecount = int(page_links[-2].text) currentpage = int( main_table.find("tfoot").find_all("tr")[1].find( "b").text) #Don't count "next" has_more = pagecount > currentpage total = self.limit * pagecount #Good enough except Exception: self.exception("Error while trying to find page count") total = len(entries) has_more = False self.debug("Finished processing results") return IndexerProcessingResult(entries=entries, queries=[], total=total, total_known=True, has_more=has_more)
def process_query_result(self, xml, searchRequest, maxResults=None): self.debug("Started processing results") entries = [] countRejected = 0 try: tree = ET.fromstring(xml) except Exception: self.exception("Error parsing XML: %s..." % xml[:500]) self.debug(xml[:500]) raise IndexerResultParsingException( "Error while parsing XML from NZBClub", self) group_pattern = re.compile(r"Newsgroup: ?([\w@\. \(\)]+) <br />") poster_pattern = re.compile(r"Poster: ?([\w@\. \(\)]+) <br />") for elem in tree.iter('item'): title = elem.find("title") url = elem.find("enclosure") pubdate = elem.find("pubDate") if title is None or url is None or pubdate is None: continue entry = self.create_nzb_search_result() if "password protect" in title.text.lower( ) or "passworded" in title.text.lower(): entry.passworded = True p = re.compile(r'"(.*)"') m = p.search(title.text) if m: entry.title = m.group(1) else: entry.title = title.text entry.link = url.attrib["url"] entry.size = int(url.attrib["length"]) entry.indexer = self.name entry.category = "N/A" entry.details_link = elem.find("link").text entry.indexerguid = elem.find("guid").text[ -8:] # GUID looks like "http://www.nzbclub.com/nzb_view58556415" of which we only want the last part description = elem.find("description").text description = urlparse.unquote(description).replace("+", " ") if re.compile(r"\d NFO Files").search( description ): # [x NFO Files] is missing if there is no NFO entry.has_nfo = NzbSearchResult.HAS_NFO_YES else: entry.has_nfo = NzbSearchResult.HAS_NFO_NO m = group_pattern.search(description) if m: entry.group = m.group(1).strip() m = poster_pattern.search(description) if m: entry.poster = m.group(1).strip() try: pubdate = arrow.get(pubdate.text, 'ddd, DD MMM YYYY HH:mm:ss Z') entry.epoch = pubdate.timestamp entry.pubdate_utc = str(pubdate) entry.age_days = (arrow.utcnow() - pubdate).days entry.pubDate = pubdate.format("ddd, DD MMM YYYY HH:mm:ss Z") except Exception as e: self.error("Unable to parse pubdate %s" % pubdate.text) continue accepted, reason = self.accept_result(entry, searchRequest, self.supportedFilters) if accepted: entries.append(entry) else: countRejected += 1 self.debug("Rejected search result. Reason: %s" % reason) self.debug("Finished processing results") return IndexerProcessingResult( entries=entries, queries=[], total=len(entries), total_known=True, has_more=False, rejected=countRejected ) # No paging with RSS. Might need/want to change to HTML and BS
def process_query_result(self, xml_response, searchRequest, maxResults=None): self.debug("Started processing results") if "0 results found" in xml_response: return IndexerProcessingResult(entries=[], queries=[], total=0, total_known=True, has_more=False, rejected=0) if "search to short" in xml_response: self.info("omgwtf says the query was too short") return IndexerProcessingResult(entries=[], queries=[], total=0, total_known=True, has_more=False, rejected=0) entries = [] countRejected = 0 try: tree = ET.fromstring(xml_response) except Exception: self.exception("Error parsing XML: %s..." % xml_response[:500]) raise IndexerResultParsingException("Error parsing XML", self) if tree.tag == "xml": total = int(tree.find("info").find("results").text) current_page = int(tree.find("info").find("current_page").text) total_pages = int(tree.find("info").find("pages").text) has_more = current_page < total_pages for item in tree.find("search_req").findall("post"): entry = self.create_nzb_search_result() entry.indexerguid = item.find("nzbid").text entry.title = item.find("release").text entry.group = item.find("group").text entry.link = item.find("getnzb").text entry.size = long(item.find("sizebytes").text) entry.epoch = long(item.find("usenetage").text) pubdate = arrow.get(entry.epoch) entry.pubdate_utc = str(pubdate) entry.pubDate = pubdate.format("ddd, DD MMM YYYY HH:mm:ss Z") entry.age_days = (arrow.utcnow() - pubdate).days entry.age_precise = True entry.details_link = item.find("details").text entry.has_nfo = NzbSearchResult.HAS_NFO_YES if item.find( "getnfo") is not None else NzbSearchResult.HAS_NFO_NO categoryid = item.find("categoryid").text if categoryid in omgwtf_to_categories.keys(): entry.category = omgwtf_to_categories[categoryid] else: entry.category = "N/A" entries.append(entry) return IndexerProcessingResult(entries=entries, queries=[], total=total, total_known=True, has_more=has_more, rejected=countRejected) elif tree.tag == "rss": regexGuid = re.compile(r".*\?id=(\w+)&.*") regexGroup = re.compile(r".*Group:<\/b> ([\w\.\-]+)<br \/>.*") for item in tree.find("channel").findall("item"): entry = self.create_nzb_search_result() indexerguid = item.find("guid").text m = regexGuid.match(indexerguid) if m: entry.indexerguid = m.group(1) else: self.warn("Unable to find GUID in " + indexerguid) continue entry.title = item.find("title").text description = item.find("description").text m = regexGroup.match(description) if m: entry.group = m.group(1) else: self.warn("Unable to find group in " + description) continue entry.size = long(item.find("enclosure").attrib["length"]) entry.pubDate = item.find("pubDate").text pubdate = arrow.get(entry.pubDate, 'ddd, DD MMM YYYY HH:mm:ss Z') entry.epoch = pubdate.timestamp entry.pubdate_utc = str(pubdate) entry.age_days = (arrow.utcnow() - pubdate).days entry.precise_date = True entry.has_nfo = NzbSearchResult.HAS_NFO_MAYBE categoryid = item.find("categoryid").text if categoryid in omgwtf_to_categories.keys(): entry.category = omgwtf_to_categories[categoryid] else: entry.category = "N/A" accepted, reason = self.accept_result(entry, searchRequest, self.supportedFilters) if accepted: entries.append(entry) else: countRejected += 1 self.debug("Rejected search result. Reason: %s" % reason) return IndexerProcessingResult(entries=entries, queries=[], total=len(entries), total_known=True, has_more=False, rejected=countRejected) else: self.warn("Unknown response type: %s" % xml_response[:100]) return IndexerProcessingResult(entries=[], queries=[], total=0, total_known=True, has_more=False, rejected=countRejected)
def process_query_result(self, xml_response, searchRequest, maxResults=None): self.debug("Started processing results") entries = [] countRejected = 0 grouppattern = re.compile(r"Group:</b> ?([\w\.]+)<br ?/>") guidpattern = re.compile(r"(.*/)?([a-zA-Z0-9@\.]+)") try: tree = ET.fromstring(xml_response) except Exception: self.exception("Error parsing XML: %s..." % xml_response[:500]) raise IndexerResultParsingException("Error parsing XML", self) for item in tree.find("channel").findall("item"): usenetdate = None entry = self.create_nzb_search_result() # These are the values that absolutely must be contained in the response entry.title = item.find("title").text entry.link = item.find("link").text entry.attributes = [] entry.pubDate = item.find("pubDate").text entry.indexerguid = item.find("guid").text entry.has_nfo = NzbSearchResult.HAS_NFO_MAYBE m = guidpattern.search(entry.indexerguid) if m: entry.indexerguid = m.group(2) description = item.find("description") if description is not None: description = description.text if description is not None and "Group:" in description: # DogNZB has the group in its description m = grouppattern.search(description) if m and m.group(1) != "not available": entry.group = m.group(1) categories = [] for i in item.findall("./newznab:attr", { "newznab": "http://www.newznab.com/DTD/2010/feeds/attributes/" }): attribute_name = i.attrib["name"] attribute_value = i.attrib["value"] if attribute_name == "size": entry.size = int(attribute_value) elif attribute_name == "guid": entry.indexerguid = attribute_value elif attribute_name == "category" and attribute_value != "": try: categories.append(int(attribute_value)) except ValueError: self.error("Unable to parse category %s" % attribute_value) elif attribute_name == "poster": entry.poster = attribute_value elif attribute_name == "info": entry.details_link = attribute_value elif attribute_name == "password" and attribute_value != "0": entry.passworded = True elif attribute_name == "group" and attribute_value != "not available": entry.group = attribute_value elif attribute_name == "usenetdate": usenetdate = arrow.get(attribute_value, 'ddd, DD MMM YYYY HH:mm:ss Z') # Store all the extra attributes, we will return them later for external apis entry.attributes.append({ "name": attribute_name, "value": attribute_value }) if entry.details_link is None: entry.details_link = self.get_details_link(entry.indexerguid) if usenetdate is None: # Not provided by attributes, use pubDate instead usenetdate = arrow.get(entry.pubDate, 'ddd, DD MMM YYYY HH:mm:ss Z') entry.epoch = usenetdate.timestamp entry.pubdate_utc = str(usenetdate) entry.age_days = (arrow.utcnow() - usenetdate).days entry.precise_date = True # Map category. Try to find the most specific category (like 2040), then the more general one (like 2000) categories = sorted( categories, reverse=True ) # Sort to make the most specific category appear first if len(categories) > 0: for k, v in categories_to_newznab.items(): for c in categories: if c in v: entry.category = k break accepted, reason = self.accept_result(entry, searchRequest, self.supportedFilters) if accepted: entries.append(entry) else: countRejected += 1 self.debug("Rejected search result. Reason: %s" % reason) if maxResults is not None and len(entries) == maxResults: break response_total_offset = tree.find( "./channel[1]/newznab:response", {"newznab": "http://www.newznab.com/DTD/2010/feeds/attributes/"}) if response_total_offset is None or response_total_offset.attrib[ "total"] == "" or response_total_offset.attrib["offset"] == "": self.warn( "Indexer returned a result page without total results and offset. Shame! *rings bell*" ) offset = 0 total = len(entries) else: total = int(response_total_offset.attrib["total"]) offset = int(response_total_offset.attrib["offset"]) if total == 0 or len(entries) == 0: self.info("Query returned no results") return IndexerProcessingResult(entries=entries, queries=[], total=0, total_known=True, has_more=False, rejected=0) return IndexerProcessingResult(entries=entries, queries=[], total=total, total_known=True, has_more=offset + len(entries) < total, rejected=countRejected)
def process_query_result(self, html, searchRequest, maxResults=None): self.debug("Started processing results") logger.info("Last results count %d" % self.last_results_count) entries = Set([]) countRejected = 0 soup = BeautifulSoup(html, config.settings.searching.htmlParser) self.debug("Using HTML parser %s" % config.settings.searching.htmlParser) main_table = soup.find('table', attrs={'id': 'r2'}) if not main_table: self.warn( "Unable to find main table in binsearch page. This just sometimes happens..." ) self.debug(html[:500]) raise IndexerResultParsingException( "Unable to find main table in binsearch page. This happens sometimes... :-)", self) items = main_table.find_all('tr') title_pattern = re.compile( r'"(.*)\.(rar|nfo|mkv|par2|001|nzb|url|zip|r[0-9]{2})"') size_pattern = re.compile( r"size: (?P<size>[0-9]+(\.[0-9]+)?).(?P<unit>(GB|MB|KB|B))") poster_pattern = re.compile(r"&p=(.*)&") goup_pattern = re.compile(r"&g=([\w\.]*)&") nfo_pattern = re.compile(r"\d nfo file") for row in items: entry = self.create_nzb_search_result() title = row.find('span', attrs={'class': 's'}) if title is None: self.debug("Ignored entry because it has no title") continue title = title.text if "password protect" in title.lower( ) or "passworded" in title.lower(): entry.passworded = True m = title_pattern.search(title) if m: entry.title = m.group(1) else: entry.title = title entry.indexerguid = row.find("input", attrs={"type": "checkbox"})["name"] entry.link = "https://www.binsearch.info/fcgi/nzb.fcgi?q=%s" % entry.indexerguid info = row.find("span", attrs={"class": "d"}) if info is None: self.debug("Ignored entry because it has no info") continue collection_link = info.find( "a" )["href"] # '/?b=MARVELS.AVENGERS.AGE.OF.ULTRON.3D.TOPBOT.TrueFrench.1080p.X264.A&g=alt.binaries.movies.mkv&p=Ramer%40marmer.com+%28Clown_nez%29&max=250' entry.details_link = "%s%s" % (self.host, collection_link) m = goup_pattern.search(collection_link) if m: entry.group = m.group(1).strip() m = poster_pattern.search(collection_link) if m: poster = m.group(1).strip() entry.poster = urlparse.unquote(poster).replace("+", " ") # Size m = size_pattern.search(info.text) if not m: self.debug("Unable to find size information in %s" % info.text) else: size = float(m.group("size")) unit = m.group("unit") if unit == "B": pass elif unit == "KB": size *= 1024 elif unit == "MB": size = size * 1024 * 1024 elif unit == "GB": size = size * 1024 * 1024 * 1024 entry.size = int(size) entry.category = "N/A" if nfo_pattern.search( info.text): # 1 nfo file is missing if there is no NFO entry.has_nfo = NzbSearchResult.HAS_NFO_YES else: entry.has_nfo = NzbSearchResult.HAS_NFO_NO # Age try: pubdate = re.compile(r"(\d{1,2}\-\w{3}\-\d{4})").search( row.text).group(1) pubdate = arrow.get(pubdate, "DD-MMM-YYYY") entry.epoch = pubdate.timestamp entry.pubdate_utc = str(pubdate) entry.age_days = (arrow.utcnow() - pubdate).days entry.age_precise = False entry.pubDate = pubdate.format("ddd, DD MMM YYYY HH:mm:ss Z") except Exception as e: self.error("Unable to find age in %s" % row.find_all("td")[-1:][0].text) continue accepted, reason = self.accept_result(entry, searchRequest, self.supportedFilters) if accepted: entries.add(entry) else: countRejected += 1 self.debug("Rejected search result. Reason: %s" % reason) self.debug("Finished processing %d results" % len(entries)) page_links = soup.find_all('table', attrs={'class': 'xMenuT'})[1].find_all("a") has_more = len(page_links) > 0 and page_links[-1].text == ">" total_known = False total = 100 if len(page_links) == 0: m = re.compile(r".* (\d+)\+? records.*").search( soup.find_all('table', attrs={'class': 'xMenuT'})[1].text) if m: total = int(m.group(1)) total_known = True return IndexerProcessingResult(entries=entries, queries=[], total_known=total_known, has_more=has_more, total=total, rejected=countRejected)