def search(self, task, entry, config=None): """ Search for name from iptorrents """ categories = config.get('category', 'All') # Make sure categories is a list if not isinstance(categories, list): categories = [categories] # If there are any text categories, turn them into their id number categories = [c if isinstance(c, int) else CATEGORIES[c] for c in categories] filter_url = '&'.join((str(c) + '=') for c in categories) entries = set() for search_string in entry.get('search_strings', [entry['title']]): query = normalize_unicode(search_string) query = quote_plus(query.encode('utf8')) url = "{base_url}/t?{filter}&q={query}&qf=".format(base_url=BASE_URL, filter=filter_url, query=query) log.debug('searching with url: %s' % url) req = requests.get(url, cookies={'uid': str(config['uid']), 'pass': config['password']}) if '/u/' + str(config['uid']) not in req.text: raise plugin.PluginError("Invalid cookies (user not logged in)...") soup = get_soup(req.content, parser="html.parser") torrents = soup.find('table', {'id': 'torrents'}) results = torrents.findAll('tr') for torrent in results: if torrent.th and 'ac' in torrent.th.get('class'): # Header column continue if torrent.find('td', {'colspan': '99'}): log.debug('No results found for search %s', search_string) break entry = Entry() link = torrent.find('a', href=re.compile('download'))['href'] entry['url'] = "{base}{link}?torrent_pass={key}".format( base=BASE_URL, link=link, key=config.get('rss_key')) entry['title'] = torrent.find('a', href=re.compile('details')).text seeders = torrent.findNext('td', {'class': 'ac t_seeders'}).text leechers = torrent.findNext('td', {'class': 'ac t_leechers'}).text entry['torrent_seeds'] = int(seeders) entry['torrent_leeches'] = int(leechers) entry['search_sort'] = torrent_availability(entry['torrent_seeds'], entry['torrent_leeches']) size = torrent.findNext(text=re.compile('^([\.\d]+) ([GMK]?)B$')) size = re.search('^([\.\d]+) ([GMK]?)B$', size) entry['content_size'] = parse_filesize(size.group(0)) log.debug('Found entry %s', entry) entries.add(entry) return entries
def url_rewrite(self, task, entry): soup = self._get_soup(task, entry['url']) # grab links from the main post: link_elements = [] log.debug('Searching %s for a tags where the text matches one of: %s', entry['url'], str(self.config.get('link_text_re'))) for regexp in self.config.get('link_text_re'): link_elements.extend(soup.find_all('a', string=re.compile(regexp))) log.debug('Original urls: %s', str(entry['urls'])) if entry['urls']: urls = list(entry['urls']) else: urls = [] log.debug('Found link elements: %s', str(link_elements)) for element in link_elements: if re.search('nfo1.rlsbb.(ru|com)', element['href']): # grab multipart links urls.extend(self.grab_multilinks(task, element['href'])) else: urls.append(element['href']) # grab links from comments regexps = self.config.get('filehosters_re', []) if self.config.get('parse_comments'): comments = soup.find_all('div', id=re.compile("commentbody")) log.debug('Comment parsing enabled: found %d comments.', len(comments)) if comments and not regexps: log.warn('You have enabled comment parsing but you did not define any filehoster_re filter. You may get a lot of unwanted and potentially dangerous links from the comments.') for comment in comments: links = comment.find_all('a') for link in links: urls.append(link['href']) # filter urls: filtered_urls = [] for i, url in enumerate(urls): urls[i] = normalize_unicode(url) for regexp in regexps: if re.search(regexp, urls[i]): filtered_urls.append(urls[i]) log.debug('Url: "%s" matched filehoster filter: %s', urls[i], regexp) break else: if regexps: log.debug( 'Url: "%s" was discarded because it does not match any of the given filehoster filters: %s', urls[i], str(regexps)) if regexps: log.debug('Using filehosters_re filters: %s', str(regexps)) urls = filtered_urls else: log.debug('No filehoster filters configured, using all found links.') num_links = len(urls) log.debug('Original urls: %s', str(entry['urls'])) log.verbose('Found %d links at %s.', num_links, entry['url']) if num_links: entry['urls'] = urls entry['url'] = urls[0] else: raise UrlRewritingError('No useable links found at %s' % entry['url'])
def url_rewrite(self, task, entry): soup = self._get_soup(task, entry['url']) link_re = re.compile('rarefile\.net.*\.rar$') # grab links from the main entry: blog_entry = soup.find('div', class_="entry") num_links = 0 link_list = None for paragraph in blog_entry.find_all('p'): links = paragraph.find_all('a', href=link_re) if len(links) > num_links: link_list = links num_links = len(links) if 'urls' in entry: urls = list(entry['urls']) else: urls = [] if link_list is not None: for link in link_list: urls.append(normalize_unicode(link['href'])) else: raise UrlRewritingError('No useable links found at %s' % entry['url']) num_links = len(urls) log.verbose('Found %d links at %s.', num_links, entry['url']) if num_links: entry['urls'] = urls entry['url'] = urls[0] else: raise UrlRewritingError('No useable links found at %s' % entry['url'])
def search(self, task, entry, config=None): from flexget.utils.template import environment from flexget.manager import manager search_strings = [ urllib.quote(normalize_unicode(s).encode("utf-8")) for s in entry.get("search_strings", [entry["title"]]) ] rss_plugin = plugin.get_plugin_by_name("rss") entries = set() rss_config = rss_plugin.instance.build_config(config) try: template = environment.from_string(rss_config["url"]) except TemplateSyntaxError as e: raise plugin.PluginError("Invalid jinja template as rss url: %s" % e) rss_config["all_entries"] = True for search_string in search_strings: rss_config["url"] = template.render({"search_term": search_string}) # TODO: capture some other_fields to try to find seed/peer/content_size numbers? try: results = rss_plugin.phase_handlers["input"](task, rss_config) except plugin.PluginError as e: log.error("Error attempting to get rss for %s: %s", rss_config["url"], e) else: entries.update(results) return entries
def search(self, task, entry, config=None): from flexget.utils.template import environment search_strings = [ quote(normalize_unicode(s).encode('utf-8')) for s in entry.get('search_strings', [entry['title']]) ] rss_plugin = plugin.get_plugin_by_name('rss') entries = set() rss_config = rss_plugin.instance.build_config(config) try: template = environment.from_string(rss_config['url']) except TemplateSyntaxError as e: raise plugin.PluginError('Invalid jinja template as rss url: %s' % e) rss_config['all_entries'] = True for search_string in search_strings: rss_config['url'] = template.render({'search_term': search_string}) # TODO: capture some other_fields to try to find seed/peer/content_size numbers? try: results = rss_plugin.phase_handlers['input'](task, rss_config) except plugin.PluginError as e: log.error('Error attempting to get rss for %s: %s', rss_config['url'], e) else: entries.update(results) return entries
def search(self, task, entry, config=None): """ Search for entries on SceneAccess """ if not session.cookies: log.debug('Logging in to %s...' % URL) params = {'username': config['username'], 'password': config['password'], 'submit': 'come on in'} session.post(URL + 'login', data=params) if config.has_key('gravity_multiplier'): multip = config['gravity_multiplier'] else: multip = 1 # Prepare queries... BASE_URLS = list() entries = set() for category in self.processCategories(config): BASE_URLS.append(URL + '%(url_path)s?method=2%(category_url_string)s' % category) # Search... for search_string in entry.get('search_strings', [entry['title']]): search_string_normalized = normalize_unicode(clean_title(search_string)) search_string_url_fragment = '&search=' + quote(search_string_normalized.encode('utf8')) for url in BASE_URLS: url += search_string_url_fragment log.debug('Search URL for `%s`: %s' % (search_string, url)) page = session.get(url).content soup = get_soup(page) for result in soup.findAll('tr', attrs={'class': 'tt_row'}): entry = Entry() entry['title'] = result.find('a', href=re.compile(r'details\?id=\d+'))['title'] entry['url'] = URL + result.find('a', href=re.compile(r'.torrent$'))['href'] entry['torrent_seeds'] = result.find('td', attrs={'class': 'ttr_seeders'}).text entry['torrent_leeches'] = result.find('td', attrs={'class': 'ttr_leechers'}).text entry['search_sort'] = torrent_availability(entry['torrent_seeds'], entry['torrent_leeches'])*multip size = result.find('td', attrs={'class': 'ttr_size'}).next size = re.search('(\d+(?:[.,]\d+)*)\s?([KMG]B)', size) if size: if size.group(2) == 'GB': entry['content_size'] = int(float(size.group(1)) * 1000 ** 3 / 1024 ** 2) elif size.group(2) == 'MB': entry['content_size'] = int(float(size.group(1)) * 1000 ** 2 / 1024 ** 2) elif size.group(2) == 'KB': entry['content_size'] = int(float(size.group(1)) * 1000 / 1024 ** 2) else: entry['content_size'] = int(float(size.group(1)) / 1024 ** 2) entries.add(entry) return entries
def search(self, entry, config): # urllib.quote will crash if the unicode string has non ascii characters, so encode in utf-8 beforehand name = normalize_unicode(entry["title"]) optionlist = [ "misc", "movies", "audio", "tv", "games", "apps", "pics", "anime", "comics", "books", "music video", "unclassified", "all", ] url = "http://isohunt.com/js/rss/%s?iht=%s&noSL" % ( urllib.quote(name.encode("utf-8")), optionlist.index(config), ) log.debug("requesting: %s" % url) rss = feedparser.parse(url) entries = [] status = rss.get("status", False) if status != 200: raise PluginWarning("Search result not 200 (OK), received %s" % status) ex = rss.get("bozo_exception", False) if ex: raise PluginWarning("Got bozo_exception (bad feed)") for item in rss.entries: entry = Entry() entry["title"] = item.title entry["url"] = item.link m = re.search(r"Size: ([\d]+).*Seeds: (\d+).*Leechers: (\d+)", item.description, re.IGNORECASE) if not m: log.debug("regexp did not find seeds / peer data") continue else: log.debug("regexp found size(%s), Seeds(%s) and Leeches(%s)" % (m.group(1), m.group(2), m.group(3))) entry["content_size"] = int(m.group(1)) entry["torrent_seeds"] = int(m.group(2)) entry["torrent_leeches"] = int(m.group(3)) entry["search_sort"] = torrent_availability(entry["torrent_seeds"], entry["torrent_leeches"]) entries.append(entry) # choose torrent if not entries: raise PluginWarning("No close matches for %s" % name, log, log_once=True) entries.sort(reverse=True, key=lambda x: x.get("search_sort")) return entries
def search(self, task, entry, config=None): """ Search for name from iptorrents """ categories = config.get('category', 'All') # Make sure categories is a list if not isinstance(categories, list): categories = [categories] # If there are any text categories, turn them into their id number categories = [c if isinstance(c, int) else CATEGORIES[c] for c in categories] filter_url = '&'.join((str(c) + '=') for c in categories) entries = set() for search_string in entry.get('search_strings', [entry['title']]): query = normalize_unicode(search_string) query = quote_plus(query.encode('utf8')) url = "{base_url}/t?{filter}&q={query}&qf=".format(base_url=BASE_URL, filter=filter_url, query=query) log.debug('searching with url: %s' % url) req = requests.get(url, cookies={'uid': str(config['uid']), 'pass': config['password']}) if '/u/' + str(config['uid']) not in req.text: raise plugin.PluginError("Invalid cookies (user not logged in)...") soup = get_soup(req.content, parser="html.parser") torrents = soup.find('table', {'id': 'torrents'}) results = torrents.findAll('tr') for torrent in results: if torrent.th and 'ac' in torrent.th.get('class'): # Header column continue if torrent.find('td', {'colspan': '99'}): log.debug('No results found for search %s', search_string) return entry = Entry() link = torrent.find('a', href=re.compile('download'))['href'] entry['url'] = "{base}{link}?torrent_pass={key}".format( base=BASE_URL, link=link, key=config.get('rss_key')) entry['title'] = torrent.find('a', href=re.compile('details')).text seeders = torrent.findNext('td', {'class': 'ac t_seeders'}).text leechers = torrent.findNext('td', {'class': 'ac t_leechers'}).text entry['torrent_seeds'] = int(seeders) entry['torrent_leeches'] = int(leechers) entry['search_sort'] = torrent_availability(entry['torrent_seeds'], entry['torrent_leeches']) size = torrent.findNext(text=re.compile('^([\.\d]+) ([GMK]?)B$')) size = re.search('^([\.\d]+) ([GMK]?)B$', size) entry['content_size'] = parse_filesize(size.group(0)) log.debug('Found entry %s', entry) entries.add(entry) return entries
def search(self, entry, config=None): from flexget.utils.template import environment from flexget.manager import manager search_strings = [ urllib.quote(normalize_unicode(s).encode('utf-8')) for s in entry.get('search_strings', [entry['title']]) ] rss_plugin = plugin.get_plugin_by_name('rss') entries = set() rss_config = rss_plugin.instance.build_config(config) template = environment.from_string(rss_config['url']) rss_config['all_entries'] = True for search_string in search_strings: # Create a fake task to pass to the rss plugin input handler task = Task(manager, 'search_rss_task', config={}) rss_config['url'] = template.render({'search_term': search_string}) # TODO: capture some other_fields to try to find seed/peer/content_size numbers? try: results = rss_plugin.phase_handlers['input'](task, rss_config) except plugin.PluginError as e: log.error('Error attempting to get rss for %s: %s', rss_config['url'], e) else: entries.update(results) return entries
def search(self, task, entry, config=None): """ Search for name from torrentleech. """ request_headers = {'User-Agent': 'curl/7.54.0'} rss_key = config['rss_key'] # build the form request: data = {'username': config['username'], 'password': config['password']} # POST the login form: try: login = task.requests.post('https://www.torrentleech.org/user/account/login/', data=data, headers=request_headers, allow_redirects=True) except RequestException as e: raise PluginError('Could not connect to torrentleech: %s', str(e)) if not isinstance(config, dict): config = {} # sort = SORT.get(config.get('sort_by', 'seeds')) # if config.get('sort_reverse'): # sort += 1 categories = config.get('category', 'all') # Make sure categories is a list if not isinstance(categories, list): categories = [categories] # If there are any text categories, turn them into their id number categories = [c if isinstance(c, int) else CATEGORIES[c] for c in categories] filter_url = '/categories/{}'.format(','.join(str(c) for c in categories)) entries = set() for search_string in entry.get('search_strings', [entry['title']]): query = normalize_unicode(search_string).replace(":", "") # urllib.quote will crash if the unicode string has non ascii characters, # so encode in utf-8 beforehand url = ('https://www.torrentleech.org/torrents/browse/list/query/' + quote(query.encode('utf-8')) + filter_url) log.debug('Using %s as torrentleech search url', url) results = task.requests.get(url, headers=request_headers, cookies=login.cookies).json() for torrent in results['torrentList']: entry = Entry() entry['download_headers'] = request_headers entry['title'] = torrent['name'] # construct download URL torrent_url = 'https://www.torrentleech.org/rss/download/{}/{}/{}'.format(torrent['fid'], rss_key, torrent['filename']) log.debug('RSS-ified download link: %s', torrent_url) entry['url'] = torrent_url # seeders/leechers entry['torrent_seeds'] = torrent['seeders'] entry['torrent_leeches'] = torrent['leechers'] entry['search_sort'] = torrent_availability(entry['torrent_seeds'], entry['torrent_leeches']) entry['content_size'] = parse_filesize(str(torrent['size']) + ' b') entries.add(entry) return sorted(entries, reverse=True, key=lambda x: x.get('search_sort'))
def url_rewrite(self, task, entry): soup = self._get_soup(task, entry['url']) # grab link from filehosters_re link_elements = [] log.debug('Searching %s for a tags where the text matches one of: %s', entry['url'], str(self.config.get('filehosters_re'))) regexps = self.config.get('filehosters_re', []) if self.config.get('parse'): link_elements = soup.find_all('div', class_=re.compile("mag_details")) log.debug( 'filehosters_re parsing enabled: found %d filehosters_re.', len(link_elements)) log.debug('Original urls: %s', str(entry['urls'])) if 'urls' in entry: urls = list(entry['urls']) log.debug('Original urls: %s', str(entry['urls'])) else: urls = [] log.debug('link_elements parsing enabled: found %d link_elements.', len(link_elements)) if link_elements and not regexps: log.warn('There are not in filehosters_re.') for target in link_elements: links = target.find_all('a') for link in links: if re.search('novafile.com', link['href']): urls.append(link['href']) # filter urls: filtered_urls = [] for i, url in enumerate(urls): urls[i] = normalize_unicode(url) for regexp in regexps: if re.search(regexp, urls[i]): filtered_urls.append(urls[i]) log.debug('Url: "%s" matched filehoster filter: %s', urls[i], regexp) break else: if regexps: log.debug( 'Url: "%s" was discarded because it does not match any of the given filehoster filters: %s', urls[i], str(regexps)) if regexps: log.debug('Using filehosters_re filters: %s', str(regexps)) urls = filtered_urls else: log.debug( 'No filehoster filters configured, using all found links.') num_links = len(urls) log.verbose('Found %d links at %s.', num_links, entry['url']) if num_links: entry['urls'] = urls entry['url'] = urls[0] else: raise UrlRewritingError('No useable links found at %s' % entry['url'])
def search(self, task, entry, config=None): config = self.process_config(config) feed = REPUTATIONS[config['reputation']] entries = set() for search_string in entry.get('search_strings', [entry['title']]): query = normalize_unicode(search_string + config.get('extra_terms', '')) for domain in ['eu', 'me', 'ch', 'in']: # urllib.quote will crash if the unicode string has non ascii characters, so encode in utf-8 beforehand url = 'http://torrentz.%s/%s?q=%s' % ( domain, feed, quote(query.encode('utf-8'))) log.debug('requesting: %s' % url) try: r = task.requests.get(url) break except requests.ConnectionError as err: # The different domains all resolve to the same ip, so only try more if it was a dns error log.warning('torrentz.%s connection failed. Error: %s' % (domain, err)) continue except requests.RequestException as err: raise plugin.PluginError( 'Error getting torrentz search results: %s' % err) else: raise plugin.PluginError( 'Error getting torrentz search results') if not r.content.strip(): raise plugin.PluginError( 'No data from %s. Maybe torrentz is blocking the FlexGet User-Agent' % url) rss = feedparser.parse(r.content) if rss.get('bozo_exception'): raise plugin.PluginError('Got bozo_exception (bad rss feed)') for item in rss.entries: m = re.search( r'Size: ([\d]+) Mb Seeds: ([,\d]+) Peers: ([,\d]+) Hash: ([a-f0-9]+)', item.description, re.IGNORECASE) if not m: log.debug('regexp did not find seeds / peer data') continue entry = Entry() entry['title'] = item.title entry['url'] = item.link entry['content_size'] = int(m.group(1)) entry['torrent_seeds'] = int(m.group(2).replace(',', '')) entry['torrent_leeches'] = int(m.group(3).replace(',', '')) entry['torrent_info_hash'] = m.group(4).upper() entry['search_sort'] = torrent_availability( entry['torrent_seeds'], entry['torrent_leeches']) entries.add(entry) log.debug('Search got %d results' % len(entries)) return entries
def search(self, task, entry, config=None): """ Search for name from piratebay. """ if not isinstance(config, dict): config = {} sort = SORT.get(config.get('sort_by', 'seeds')) if config.get('sort_reverse'): sort += 1 if isinstance(config.get('category'), int): category = config['category'] else: category = CATEGORIES.get(config.get('category', 'all')) filter_url = '/0/%d/%d' % (sort, category) entries = set() for search_string in entry.get('search_strings', [entry['title']]): query = normalize_unicode(search_string) # TPB search doesn't like dashes query = query.replace('-', ' ') # urllib.quote will crash if the unicode string has non ascii characters, so encode in utf-8 beforehand url = 'http://thepiratebay.%s/search/%s%s' % ( CUR_TLD, quote(query.encode('utf-8')), filter_url) log.debug('Using %s as piratebay search url' % url) page = requests.get(url).content soup = get_soup(page) for link in soup.find_all('a', attrs={'class': 'detLink'}): entry = Entry() entry['title'] = self.extract_title(link) if not entry['title']: log.error( 'Malformed search result. No title or url found. Skipping.' ) continue entry['url'] = 'http://thepiratebay.%s%s' % (CUR_TLD, link.get('href')) tds = link.parent.parent.parent.find_all('td') entry['torrent_seeds'] = int(tds[-2].contents[0]) entry['torrent_leeches'] = int(tds[-1].contents[0]) entry['search_sort'] = torrent_availability( entry['torrent_seeds'], entry['torrent_leeches']) # Parse content_size size = link.find_next(attrs={'class': 'detDesc'}).contents[0] size = re.search('Size ([\.\d]+)\xa0([GMK])iB', size) if size: if size.group(2) == 'G': entry['content_size'] = int( float(size.group(1)) * 1000**3 / 1024**2) elif size.group(2) == 'M': entry['content_size'] = int( float(size.group(1)) * 1000**2 / 1024**2) else: entry['content_size'] = int( float(size.group(1)) * 1000 / 1024**2) entries.add(entry) return sorted(entries, reverse=True, key=lambda x: x.get('search_sort'))
def search(self, task, entry, config=None): """ Search for name from torrentleech. """ request_headers = {'User-Agent': 'curl/7.54.0'} rss_key = config['rss_key'] # build the form request: data = {'username': config['username'], 'password': config['password']} # POST the login form: try: login = task.requests.post('https://www.torrentleech.org/user/account/login/', data=data, headers=request_headers, allow_redirects=True) except RequestException as e: raise PluginError('Could not connect to torrentleech: %s' % str(e)) if not isinstance(config, dict): config = {} # sort = SORT.get(config.get('sort_by', 'seeds')) # if config.get('sort_reverse'): # sort += 1 categories = config.get('category', 'all') # Make sure categories is a list if not isinstance(categories, list): categories = [categories] # If there are any text categories, turn them into their id number categories = [c if isinstance(c, int) else CATEGORIES[c] for c in categories] filter_url = '/categories/{}'.format(','.join(str(c) for c in categories)) entries = set() for search_string in entry.get('search_strings', [entry['title']]): query = normalize_unicode(search_string).replace(":", "") # urllib.quote will crash if the unicode string has non ascii characters, # so encode in utf-8 beforehand url = ('https://www.torrentleech.org/torrents/browse/list/query/' + quote(query.encode('utf-8')) + filter_url) log.debug('Using %s as torrentleech search url', url) results = task.requests.get(url, headers=request_headers, cookies=login.cookies).json() for torrent in results['torrentList']: entry = Entry() entry['download_headers'] = request_headers entry['title'] = torrent['name'] # construct download URL torrent_url = 'https://www.torrentleech.org/rss/download/{}/{}/{}'.format(torrent['fid'], rss_key, torrent['filename']) log.debug('RSS-ified download link: %s', torrent_url) entry['url'] = torrent_url # seeders/leechers entry['torrent_seeds'] = torrent['seeders'] entry['torrent_leeches'] = torrent['leechers'] entry['search_sort'] = torrent_availability(entry['torrent_seeds'], entry['torrent_leeches']) entry['content_size'] = parse_filesize(str(torrent['size']) + ' b') entries.add(entry) return sorted(entries, reverse=True, key=lambda x: x.get('search_sort'))
def search(self, task, entry, config=None): """ Search for entries on SceneAccess """ session = task.requests if 'sceneaccess.eu' not in session.domain_limiters: session.add_domain_limiter(TimedLimiter('sceneaccess.eu', '7 seconds')) if not session.cookies: log.debug('Logging in to %s...' % URL) params = {'username': config['username'], 'password': config['password'], 'submit': 'come on in'} session.post(URL + 'login', data=params) if 'gravity_multiplier' in config: multip = config['gravity_multiplier'] else: multip = 1 # Prepare queries... base_urls = list() entries = set() for category in self.process_categories(config): base_urls.append(URL + '%(url_path)s?method=2%(category_url_string)s' % category) # Search... for search_string in entry.get('search_strings', [entry['title']]): search_string_normalized = normalize_unicode(clean_title(search_string)) search_string_url_fragment = '&search=' + quote(search_string_normalized.encode('utf8')) for url in base_urls: url += search_string_url_fragment log.debug('Search URL for `%s`: %s' % (search_string, url)) page = session.get(url).content soup = get_soup(page) for result in soup.findAll('tr', attrs={'class': 'tt_row'}): entry = Entry() entry['title'] = result.find('a', href=re.compile(r'details\?id=\d+'))['title'] entry['url'] = URL + result.find('a', href=re.compile(r'.torrent$'))['href'] entry['torrent_seeds'] = result.find('td', attrs={'class': 'ttr_seeders'}).text entry['torrent_leeches'] = result.find('td', attrs={'class': 'ttr_leechers'}).text entry['search_sort'] = torrent_availability(entry['torrent_seeds'], entry['torrent_leeches']) * multip size = result.find('td', attrs={'class': 'ttr_size'}).text size = re.search('(\d+(?:[.,]\d+)*)\s?([KMG]B)', size) entry['content_size'] = parse_filesize(size.group(0)) entries.add(entry) return entries
def search(self, task, entry, config=None): """ Search for name from iptorrents """ categories = config.get('category', 'all') # Make sure categories is a list if not isinstance(categories, list): categories = [categories] # If there are any text categories, turn them into their id number categories = [c if isinstance(c, int) else CATEGORIES[c] for c in categories] filter_url = '&'.join((str(c) + '=') for c in categories) entries = set() for search_string in entry.get('search_strings', [entry['title']]): query = normalize_unicode(search_string) query = quote_plus(query.encode('utf8')) url = "{base_url}/t?{filter}&q={query}&qf=".format(base_url=BASE_URL, filter=filter_url, query=query) log.debug('searching with url: %s' % url) req = requests.get(url, cookies={'uid': str(config['uid']), 'pass': config['password']}) if '/u/' + str(config.get('uid')) not in req.content: raise plugin.PluginError("Invalid cookies (user not logged in)...") soup = get_soup(req.content, parser="html.parser") torrents = soup.find('table', {'class': 'torrents'}) for torrent in torrents.findAll('a', href=re.compile('\.torrent$')): entry = Entry() entry['url'] = "{base}{link}?torrent_pass={key}".format( base=BASE_URL, link=torrent['href'], key=config.get('rss_key')) entry['title'] = torrent.findPrevious("a", attrs={'class': 't_title'}).text seeders = torrent.findNext('td', {'class': 'ac t_seeders'}).text leechers = torrent.findNext('td', {'class': 'ac t_leechers'}).text entry['torrent_seeds'] = int(seeders) entry['torrent_leeches'] = int(leechers) entry['search_sort'] = torrent_availability(entry['torrent_seeds'], entry['torrent_leeches']) size = torrent.findNext(text=re.compile('^([\.\d]+) ([GMK]?)B$')) size = re.search('^([\.\d]+) ([GMK]?)B$', size) if size: if size.group(2) == 'G': entry['content_size'] = int(float(size.group(1)) * 1000 ** 3 / 1024 ** 2) elif size.group(2) == 'M': entry['content_size'] = int(float(size.group(1)) * 1000 ** 2 / 1024 ** 2) elif size.group(2) == 'K': entry['content_size'] = int(float(size.group(1)) * 1000 / 1024 ** 2) else: entry['content_size'] = int(float(size.group(1)) / 1024 ** 2) entries.add(entry) return entries
def entries_from_search(self, name, url=None): """Parses torrent download url from search results""" name = normalize_unicode(name) if not url: url = 'http://www.newtorrents.info/search/%s' % quote( name.encode('utf-8'), safe=b':/~?=&%') log.debug('search url: %s' % url) html = requests.get(url).text # fix </SCR'+'IPT> so that BS does not crash # TODO: should use beautifulsoup massage html = re.sub(r'(</SCR.*?)...(.*?IPT>)', r'\1\2', html) soup = get_soup(html) # saving torrents in dict torrents = [] for link in soup.find_all('a', attrs={'href': re.compile('down.php')}): torrent_url = 'http://www.newtorrents.info%s' % link.get('href') release_name = link.parent.next.get('title') # quick dirty hack seed = link.find_next('td', attrs={ 'class': re.compile('s') }).renderContents() if seed == 'n/a': seed = 0 else: try: seed = int(seed) except ValueError: log.warning( 'Error converting seed value (%s) from newtorrents to integer.' % seed) seed = 0 # TODO: also parse content_size and peers from results torrents.append( Entry(title=release_name, url=torrent_url, torrent_seeds=seed, search_sort=torrent_availability(seed, 0))) # sort with seed number Reverse order torrents.sort(reverse=True, key=lambda x: x.get('search_sort', 0)) # choose the torrent if not torrents: dashindex = name.rfind('-') if dashindex != -1: return self.entries_from_search(name[:dashindex]) else: return torrents else: if len(torrents) == 1: log.debug('found only one matching search result.') else: log.debug( 'search result contains multiple matches, sorted %s by most seeders' % torrents) return torrents
def search(self, task, entry, config=None): """ Search for name from torrentday. """ categories = config.get('category', 'all') # Make sure categories is a list if not isinstance(categories, list): categories = [categories] # If there are any text categories, turn them into their id number categories = [c if isinstance(c, int) else CATEGORIES[c] for c in categories] params = { 'cata': 'yes', 'c%s' % ','.join(str(c) for c in categories): 1, 'clear-new': 1} entries = set() for search_string in entry.get('search_strings', [entry['title']]): url = 'https://www.torrentday.com/browse.php' params['search'] = normalize_unicode(search_string).replace(':', '') cookies = { 'uid': config['uid'], 'pass': config['passkey'], '__cfduid': config['cfduid'] } try: page = requests.get(url, params=params, cookies=cookies).content except RequestException as e: raise PluginError('Could not connect to torrentday: %s' % e) soup = get_soup(page) for tr in soup.find_all('tr', { 'class': 'browse' }): entry = Entry() # find the torrent names title = tr.find('a', { 'class': 'torrentName' }) entry['title'] = title.contents[0] log.debug('title: %s', title.contents[0]) # find download link torrent_url = tr.find('td', { 'class': 'dlLinksInfo' }) torrent_url = torrent_url.find('a').get('href') # construct download URL torrent_url = ( 'https://www.torrentday.com/' + torrent_url + '?torrent_pass='******'rss_key'] ) log.debug('RSS-ified download link: %s', torrent_url) entry['url'] = torrent_url # us tr object for seeders/leechers seeders, leechers = tr.find_all('td', { 'class': ['seedersInfo', 'leechersInfo']}) entry['torrent_seeds'] = int(seeders.contents[0].replace(',', '')) entry['torrent_leeches'] = int(leechers.contents[0].replace(',', '')) entry['search_sort'] = torrent_availability(entry['torrent_seeds'], entry['torrent_leeches']) # use tr object for size size = tr.find('td', text=re.compile('([\.\d]+) ([TGMKk]?)B')).contents[0] size = re.search('([\.\d]+) ([TGMKk]?)B', str(size)) entry['content_size'] = parse_filesize(size.group(0)) entries.add(entry) return sorted(entries, reverse=True, key=lambda x: x.get('search_sort'))
def search(self, task, entry, config): search_strings = [normalize_unicode(s).lower() for s in entry.get('search_strings', [entry['title']])] entries = set() for search_string in search_strings: search_string = clean_title(search_string) search_string_url_fragment = search_string params = {'rss': 1} if config.get('verified'): search_string_url_fragment += ' verified:1' url = 'https://kat.cr/usearch/%s/' % quote(search_string_url_fragment.encode('utf-8')) if config.get('category', 'all') != 'all': params['category'] = config['category'] sorters = [{'field': 'time_add', 'sorder': 'desc'}, {'field': 'seeders', 'sorder': 'desc'}] for sort in sorters: params.update(sort) log.debug('requesting: %s' % url) try: r = task.requests.get(url, params=params, raise_status=False) except RequestException as e: log.warning('Search resulted in: %s' % e) continue if not r.content: log.debug('No content returned from search.') continue elif r.status_code != 200: log.warning('Search returned %s response code' % r.status_code) continue rss = feedparser.parse(r.content) ex = rss.get('bozo_exception', False) if ex: log.warning('Got bozo_exception (bad feed)') continue for item in rss.entries: entry = Entry() entry['title'] = item.title if not item.get('enclosures'): log.warning('Could not get url for entry from KAT. Maybe plugin needs updated?') continue entry['url'] = item.enclosures[0]['url'] entry['torrent_seeds'] = int(item.torrent_seeds) entry['torrent_leeches'] = int(item.torrent_peers) entry['search_sort'] = torrent_availability(entry['torrent_seeds'], entry['torrent_leeches']) entry['content_size'] = int(item.torrent_contentlength) / 1024 / 1024 entry['torrent_info_hash'] = item.torrent_infohash entries.add(entry) if len(rss.entries) < 25: break return entries
def prepare_search_query(self, search_string): query = normalize_unicode(search_string) se = re.findall('((((|S)[\d]+(E|x)[\d]+)|(|S)[\d]+))$', query)[0][0] query = re.sub(se, '', query).strip() self.se = se self.query = query return query
def search(self, task, entry, config=None): """ Search for entries on PublicHD """ categories = config.get('category', 'all') # Ensure categories a list if not isinstance(categories, list): categories = [categories] # Convert named category to its respective category id number categories = [c if isinstance(c, int) else CATEGORIES[c] for c in categories] category_url_fragment = '&category=%s' % urllib.quote(';'.join(str(c) for c in categories)) base_url = 'http://publichd.se/index.php?page=torrents&active=0' entries = set() for search_string in entry.get('search_strings', [entry['title']]): query = normalize_unicode(search_string) query_url_fragment = '&search=' + urllib.quote(query.encode('utf8')) # http://publichd.se/index.php?page=torrents&active=0&category=5;15&search=QUERY url = (base_url + category_url_fragment + query_url_fragment) log.debug('PublicHD search url: %s' % url) page = requests.get(url).content soup = get_soup(page) for result in soup.findAll('a', href=re.compile('page=torrent-details')): entry = Entry() entry['title'] = result.text # Expand the selection to whole row result = result.findPrevious('tr') download_url = result.find('a', href=re.compile('\.torrent$'))['href'] torrent_hash = re.search(r'/([0-9a-fA-F]{5,40})/', download_url).group(1) entry['url'] = 'http://publichd.se/download.php?id=%s' % torrent_hash seeds, leeches = result.findAll('td', text=re.compile('^\d+$')) entry['torrent_seeds'] = int(seeds.text) entry['torrent_leeches'] = int(leeches.text) entry['search_sort'] = torrent_availability(entry['torrent_seeds'], entry['torrent_leeches']) size = result.find("td", text=re.compile('(\d+(?:[.,]\d+)*)\s?([KMG]B)')).text size = re.search('(\d+(?:[.,]\d+)*)\s?([KMG]B)', size) if size: if size.group(2) == 'GB': entry['content_size'] = int(float(size.group(1).replace(',', '')) * 1000 ** 3 / 1024 ** 2) elif size.group(2) == 'MB': entry['content_size'] = int(float(size.group(1).replace(',', '')) * 1000 ** 2 / 1024 ** 2) elif size.group(2) == 'KB': entry['content_size'] = int(float(size.group(1).replace(',', '')) * 1000 / 1024 ** 2) else: entry['content_size'] = int(float(size.group(1).replace(',', '')) / 1024 ** 2) entries.add(entry) return entries
def search(self, task, entry, config=None): """ Search for entries on PublicHD """ categories = config.get('category', 'all') # Ensure categories a list if not isinstance(categories, list): categories = [categories] # Convert named category to its respective category id number categories = [c if isinstance(c, int) else CATEGORIES[c] for c in categories] category_url_fragment = '&category=%s' % urllib.parse.quote(';'.join(str(c) for c in categories)) base_url = 'http://publichd.se/index.php?page=torrents&active=0' entries = set() for search_string in entry.get('search_strings', [entry['title']]): query = normalize_unicode(search_string) query_url_fragment = '&search=' + urllib.parse.quote(query.encode('utf8')) # http://publichd.se/index.php?page=torrents&active=0&category=5;15&search=QUERY url = (base_url + category_url_fragment + query_url_fragment) log.debug('PublicHD search url: %s' % url) page = requests.get(url).content soup = get_soup(page) for result in soup.findAll('a', href=re.compile('page=torrent-details')): entry = Entry() entry['title'] = result.text # Expand the selection to whole row result = result.findPrevious('tr') download_url = result.find('a', href=re.compile('\.torrent$'))['href'] torrent_hash = re.search(r'/([0-9a-fA-F]{5,40})/', download_url).group(1) entry['url'] = 'http://publichd.se/download.php?id=%s' % torrent_hash seeds, leeches = result.findAll('td', text=re.compile('^\d+$')) entry['torrent_seeds'] = int(seeds.text) entry['torrent_leeches'] = int(leeches.text) entry['search_sort'] = torrent_availability(entry['torrent_seeds'], entry['torrent_leeches']) size = result.find("td", text=re.compile('(\d+(?:[.,]\d+)*)\s?([KMG]B)')).text size = re.search('(\d+(?:[.,]\d+)*)\s?([KMG]B)', size) if size: if size.group(2) == 'GB': entry['content_size'] = int(float(size.group(1).replace(',', '')) * 1000 ** 3 / 1024 ** 2) elif size.group(2) == 'MB': entry['content_size'] = int(float(size.group(1).replace(',', '')) * 1000 ** 2 / 1024 ** 2) elif size.group(2) == 'KB': entry['content_size'] = int(float(size.group(1).replace(',', '')) * 1000 / 1024 ** 2) else: entry['content_size'] = int(float(size.group(1).replace(',', '')) / 1024 ** 2) entries.add(entry) return entries
def search(self, entry, config=None): config = self.process_config(config) feed = REPUTATIONS[config['reputation']] entries = set() for search_string in entry.get('search_string', [entry['title']]): query = normalize_unicode(search_string + config.get('extra_terms', '')) # urllib.quote will crash if the unicode string has non ascii characters, so encode in utf-8 beforehand url = 'http://torrentz.eu/%s?q=%s' % ( feed, urllib.quote(query.encode('utf-8'))) log.debug('requesting: %s' % url) try: opened = urllib2.urlopen(url) except urllib2.URLError as err: url = 'http://torrentz.me/%s?q=%s' % ( feed, urllib.quote(query.encode('utf-8'))) log.warning( 'torrentz.eu failed, trying torrentz.me. Error: %s' % err) try: opened = urllib2.urlopen(url) except urllib2.URLError as err: raise plugin.PluginWarning('Error requesting URL: %s' % err) rss = feedparser.parse(opened) status = rss.get('status', False) if status != 200: raise plugin.PluginWarning( 'Search result not 200 (OK), received %s %s' % (status, opened.msg)) ex = rss.get('bozo_exception', False) if ex: raise plugin.PluginWarning('Got bozo_exception (bad feed)') for item in rss.entries: m = re.search( r'Size: ([\d]+) Mb Seeds: ([,\d]+) Peers: ([,\d]+) Hash: ([a-f0-9]+)', item.description, re.IGNORECASE) if not m: log.debug('regexp did not find seeds / peer data') continue entry = Entry() entry['title'] = item.title entry['url'] = item.link entry['content_size'] = int(m.group(1)) entry['torrent_seeds'] = int(m.group(2).replace(',', '')) entry['torrent_leeches'] = int(m.group(3).replace(',', '')) entry['torrent_info_hash'] = m.group(4).upper() entry['search_sort'] = torrent_availability( entry['torrent_seeds'], entry['torrent_leeches']) entries.add(entry) log.debug('Search got %d results' % len(entries)) return entries
def search(self, task, entry, config=None): """ Search for name from fuzer. """ self.rss_key = config['rss_key'] username = config['username'] password = hashlib.md5(config['password'].encode('utf-8')).hexdigest() # build the form request: data = {'cookieuser': '******', 'do': 'login', 's': '', 'securitytoken': 'guest', 'vb_login_username': username, 'vb_login_password': '', 'vb_login_md5password': password, 'vb_login_md5password_utf': password } # POST the login form: try: login = requests.post('https://www.fuzer.me/login.php?do=login', data=data) except RequestException as e: raise PluginError('Could not connect to fuzer: %s' % str(e)) login_check_phrases = ['ההתחברות נכשלה', 'banned'] if any(phrase in login.text for phrase in login_check_phrases): raise PluginError('Login to Fuzer failed, check credentials') self.user_id = requests.cookies.get('fzr2userid') category = config.get('category', [0]) # Make sure categories is a list if not isinstance(category, list): category = [category] # If there are any text categories, turn them into their id number categories = [c if isinstance(c, int) else CATEGORIES[c] for c in category] c_list = [] for c in categories: c_list.append('c{}={}'.format(quote_plus('[]'), c)) entries = [] if entry.get('imdb_id'): log.debug('imdb_id {} detected, using in search.'.format(entry['imdb_id'])) soup = self.get_fuzer_soup(entry['imdb_id'], c_list) entries = self.extract_entry_from_soup(soup) if entries: for e in list(entries): e['imdb_id'] = entry.get('imdb_id') else: for search_string in entry.get('search_strings', [entry['title']]): query = normalize_unicode(search_string).replace(":", "") text = quote_plus(query.encode('windows-1255')) soup = self.get_fuzer_soup(text, c_list) entries += self.extract_entry_from_soup(soup) return sorted(entries, reverse=True, key=lambda x: x.get('search_sort')) if entries else []
def search(self, task, entry, config): """ Search for entries on RarBG """ categories = config.get('category', 'all') # Ensure categories a list if not isinstance(categories, list): categories = [categories] # Convert named category to its respective category id number categories = [c if isinstance(c, int) else CATEGORIES[c] for c in categories] category_url_fragment = ';'.join(str(c) for c in categories) entries = set() token = self.get_token() if not token: log.error('No token set. Exiting RARBG search.') return entries params = {'mode': 'search', 'token': token, 'ranked': int(config['ranked']), 'min_seeders': config['min_seeders'], 'min_leechers': config['min_leechers'], 'sort': config['sorted_by'], 'category': category_url_fragment, 'format': 'json'} for search_string in entry.get('search_strings', [entry['title']]): params.pop('search_string', None) params.pop('search_imdb', None) if entry.get('movie_name'): params['search_imdb'] = entry.get('imdb_id') else: query = normalize_unicode(search_string) query_url_fragment = query.encode('utf8') params['search_string'] = query_url_fragment if config['use_tvdb']: plugin.get_plugin_by_name('thetvdb_lookup').instance.lazy_series_lookup(entry) params['search_tvdb'] = entry.get('tvdb_id') log.debug('Using tvdb id %s' % entry.get('tvdb_id')) page = requests.get(self.base_url, params=params) log.debug('requesting: %s' % page.url) try: r = page.json() except ValueError: log.debug(page.text) continue for result in r: e = Entry() e['title'] = result.get('f') e['url'] = result.get('d') entries.add(e) return entries
def search(self, entry, config): search_strings = [normalize_unicode(s).lower() for s in entry.get('search_strings', [entry['title']])] entries = set() for search_string in search_strings: search_string_url_fragment = search_string if config.get('verified'): search_string_url_fragment += ' verified:1' url = 'http://kickass.to/search/%s/?rss=1' % urllib.quote(search_string_url_fragment.encode('utf-8')) if config.get('category', 'all') != 'all': url += '&category=%s' % config['category'] sorters = [{'field': 'time_add', 'sorder': 'desc'}, {'field': 'seeders', 'sorder': 'desc'}] for sort in sorters: url += '&field=%(field)s&sorder=%(sorder)s' % sort log.debug('requesting: %s' % url) rss = feedparser.parse(url) status = rss.get('status', False) if status == 404: # Kat returns status code 404 when no results found for some reason... log.debug('No results found for search query: %s' % search_string) continue elif status not in [200, 301]: log.warning('Search result not 200 (OK), received %s' % status) continue ex = rss.get('bozo_exception', False) if ex: log.warning('Got bozo_exception (bad feed)') continue for item in rss.entries: entry = Entry() entry['title'] = item.title if not item.get('enclosures'): log.warning('Could not get url for entry from KAT. Maybe plugin needs updated?') continue entry['url'] = item.enclosures[0]['url'] entry['torrent_seeds'] = int(item.torrent_seeds) entry['torrent_leeches'] = int(item.torrent_peers) entry['search_sort'] = torrent_availability(entry['torrent_seeds'], entry['torrent_leeches']) entry['content_size'] = int(item.torrent_contentlength) / 1024 / 1024 entry['torrent_info_hash'] = item.torrent_infohash entries.add(entry) if len(rss.entries) < 25: break return entries
def search(self, task, entry, config=None): """ Search for entries on Serienjunkies """ base_url = 'http://serienjunkies.org/search/' mull = { "Dauer:", "Download:", "Uploader:", u"Größe:", u"Tonhöhe:", "Sprache:", "Format:", "HQ-Cover:" } self.config = task.config.get('searchSerienjunkies') or {} self.config.setdefault('hoster', DEFHOS) self.config.setdefault('language', DEFLANG) entries = set() for search_string in entry.get('search_strings', [entry['title']]): query = normalize_unicode(search_string) query_url_fragment = urllib.quote(query.encode('utf8')) # http://serienjunkies.org/search/QUERY url = (base_url + query_url_fragment) log.debug('Serienjunkies search url: %s' % url) page = requests.get(url).content soup = get_soup(page) hoster = self.config['hoster'] if self.config['language'] == 'english': english = True else: english = None for p in soup.find_all('p'): entry = Entry() if p.strong is not None and p.strong.text not in mull: if english: try: if not p.strong.find(text=re.compile( "german", flags=re.IGNORECASE)): link = p.find( text=re.compile(hoster)).find_previous('a') entry['title'] = p.strong.text entry['url'] = link.get('href') entries.add(entry) except: pass else: try: if p.strong.find(text=re.compile( "german", flags=re.IGNORECASE)): link = p.find( text=re.compile(hoster)).find_previous('a') entry['title'] = p.strong.text entry['url'] = link.get('href') entries.add(entry) except: pass return entries
def search(self, task, entry, config): """Search interface""" self.setup(task, config) entries = set() params = self.params_from_config(config) for search_string in entry.get('search_strings', [entry['title']]): query = normalize_unicode(search_string) params[self._key('search')] = query entries.update(self.get_entries(self.search_results(params))) return entries
def search(self, task, entry, config=None): """ Search for name from piratebay. """ if not isinstance(config, dict): config = {} self.set_urls(config.get('url', URL)) sort = SORT.get(config.get('sort_by', 'seeds')) if config.get('sort_reverse'): sort += 1 if isinstance(config.get('category'), int): category = config['category'] else: category = CATEGORIES.get(config.get('category', 'all')) filter_url = '/0/%d/%d' % (sort, category) entries = set() for search_string in entry.get('search_strings', [entry['title']]): query = normalize_unicode(search_string) # TPB search doesn't like dashes or quotes query = query.replace('-', ' ').replace("'", " ") # urllib.quote will crash if the unicode string has non ascii characters, so encode in utf-8 beforehand url = '%s/search/%s%s' % (self.url, quote(query.encode('utf-8')), filter_url) log.debug('Using %s as piratebay search url' % url) page = task.requests.get(url).content soup = get_soup(page) for link in soup.find_all('a', attrs={'class': 'detLink'}): entry = Entry() entry['title'] = self.extract_title(link) if not entry['title']: log.error('Malformed search result. No title or url found. Skipping.') continue href = link.get('href') if href.startswith('/'): # relative link? href = self.url + href entry['url'] = href tds = link.parent.parent.parent.find_all('td') entry['torrent_seeds'] = int(tds[-2].contents[0]) entry['torrent_leeches'] = int(tds[-1].contents[0]) entry['search_sort'] = torrent_availability(entry['torrent_seeds'], entry['torrent_leeches']) # Parse content_size size_text = link.find_next(attrs={'class': 'detDesc'}).get_text() if size_text: size = re.search('Size (\d+(\.\d+)?\xa0(?:[PTGMK])?i?B)', size_text) if size: entry['content_size'] = parse_filesize(size.group(1)) else: log.error('Malformed search result? Title: "%s", No size? %s', entry['title'], size_text) entries.add(entry) return sorted(entries, reverse=True, key=lambda x: x.get('search_sort'))
def search(self, arg_entry, config=None): """ Search for name from piratebay. """ if not isinstance(config, dict): config = {} sort = SORT.get(config.get('sort_by', 'seeds')) if config.get('sort_reverse'): sort += 1 if isinstance(config.get('category'), int): category = config['category'] else: category = CATEGORIES.get(config.get('category', 'all')) filter_url = '/0/%d/%d' % (sort, category) entries = set() for search_string in arg_entry.get('search_string', [arg_entry['title']]): query = normalize_unicode(search_string) # TPB search doesn't like dashes query = query.replace('-', ' ') # urllib.quote will crash if the unicode string has non ascii characters, so encode in utf-8 beforehand url = 'http://thepiratebay.%s/search/%s%s' % ( CUR_TLD, urllib.quote(query.encode('utf-8')), filter_url) log.debug('Using %s as piratebay search url' % url) page = requests.get(url).content soup = get_soup(page) for link in soup.find_all('a', attrs={'class': 'detLink'}): entry = Entry() entry['title'] = link.contents[0] entry['url'] = 'http://thepiratebay.%s%s' % (CUR_TLD, link.get('href')) tds = link.parent.parent.parent.find_all('td') entry['torrent_seeds'] = int(tds[-2].contents[0]) entry['torrent_leeches'] = int(tds[-1].contents[0]) entry['search_sort'] = torrent_availability( entry['torrent_seeds'], entry['torrent_leeches']) # Parse content_size size = link.find_next(attrs={'class': 'detDesc'}).contents[0] size = re.search('Size ([\.\d]+)\xa0([GMK])iB', size) if size: if size.group(2) == 'G': entry['content_size'] = int( float(size.group(1)) * 1000**3 / 1024**2) elif size.group(2) == 'M': entry['content_size'] = int( float(size.group(1)) * 1000**2 / 1024**2) else: entry['content_size'] = int( float(size.group(1)) * 1000 / 1024**2) entries.add(entry) return sorted( entries, reverse=True, key=lambda x: x.get('search_sort'))
def search(self, entry, config): # urllib.quote will crash if the unicode string has non ascii characters, so encode in utf-8 beforehand name = normalize_unicode(entry['title']) optionlist = [ 'misc', 'movies', 'audio', 'tv', 'games', 'apps', 'pics', 'anime', 'comics', 'books', 'music video', 'unclassified', 'all' ] url = 'http://isohunt.com/js/rss/%s?iht=%s&noSL' % (urllib.quote( name.encode('utf-8')), optionlist.index(config)) log.debug('requesting: %s' % url) rss = feedparser.parse(url) entries = [] status = rss.get('status', False) if status != 200: raise PluginWarning('Search result not 200 (OK), received %s' % status) ex = rss.get('bozo_exception', False) if ex: raise PluginWarning('Got bozo_exception (bad feed)') for item in rss.entries: entry = Entry() entry['title'] = item.title entry['url'] = item.link m = re.search(r'Size: ([\d]+).*Seeds: (\d+).*Leechers: (\d+)', item.description, re.IGNORECASE) if not m: log.debug('regexp did not find seeds / peer data') continue else: log.debug('regexp found size(%s), Seeds(%s) and Leeches(%s)' % (m.group(1), m.group(2), m.group(3))) entry['content_size'] = int(m.group(1)) entry['torrent_seeds'] = int(m.group(2)) entry['torrent_leeches'] = int(m.group(3)) entry['search_sort'] = torrent_availability( entry['torrent_seeds'], entry['torrent_leeches']) entries.append(entry) # choose torrent if not entries: raise PluginWarning('No close matches for %s' % name, log, log_once=True) entries.sort(reverse=True, key=lambda x: x.get('search_sort')) return entries
def search(self, task, entry, config=None): config = self.prepare_config(config) if not session.cookies: log.debug('Logging in to %s...' % URL) params = { 'username': config['username'], 'password': config['password'], 'keeplogged': '1', 'login': '******' } session.post(URL + 'login.php', data=params) cat = ''.join([ '&' + ('filter_cat[%s]' % id) + '=1' for id in config['category'] ]) rls = 'release_type=' + config['type'] url_params = rls + cat multip = config['gravity_multiplier'] entries = set() for search_string in entry.get('search_strings', [entry['title']]): srch = normalize_unicode(clean_title(search_string)) srch = '&searchstr=' + quote(srch.encode('utf8')) url = URL + 'torrents.php?' + url_params + srch log.debug('Fetching URL for `%s`: %s' % (search_string, url)) page = session.get(url).content soup = get_soup(page) for result in soup.findAll('tr', attrs={'class': 'torrent'}): entry = Entry() entry['title'] = result.find('span', attrs={ 'class': 'torrent_name_link' }).text entry['url'] = URL + result.find( 'a', href=re.compile( 'torrents\.php\?action=download')).get('href') entry['torrent_seeds'], entry['torrent_leeches'] = [ r.text for r in result.findAll('td')[-2:] ] entry['search_sort'] = torrent_availability( entry['torrent_seeds'], entry['torrent_leeches']) * multip size = result.findAll('td')[-4].text size = re.search('(\d+(?:[.,]\d+)*)\s?([KMG]B)', size) entry['content_size'] = parse_filesize(size.group(0)) entries.add(entry) return entries
def search(self, task, entry, config=None): if not config: log.debug('Divxatope disabled') return set() log.debug('Search DivxATope') url_search = 'http://divxatope1.com/buscar/descargas' results = set() for search_string in entry.get('search_strings', [entry['title']]): query = normalize_unicode(search_string) query = re.sub(' \(\d\d\d\d\)$', '', query) log.debug('Searching DivxATope %s' % query) query = query.encode('utf8', 'ignore') data = {'q': query} try: response = task.requests.post(url_search, data=data) except requests.RequestException as e: log.error('Error searching DivxATope: %s' % e) return content = response.content soup = get_soup(content) if 'divxatope1.com' in url_search: soup2 = soup.find('ul', attrs={'class': 'buscar-list'}) else: soup2 = soup.find('ul', attrs={'class': 'peliculas-box'}) children = soup2.findAll('a', href=True) for child in children: entry = Entry() entry['url'] = child['href'] entry_title = child.find('h2') if entry_title is None: continue entry_title = entry_title.contents if not entry_title: continue else: entry_title = entry_title[0] quality_lan = child.find('strong') if quality_lan is None: continue quality_lan = quality_lan.contents if len(quality_lan) > 2: if (isinstance(quality_lan[0], Tag)): entry_quality_lan = quality_lan[1] else: entry_quality_lan = quality_lan[0] + ' ' + quality_lan[ 2] elif len(quality_lan) == 2: entry_quality_lan = quality_lan[1] entry['title'] = entry_title + ' ' + entry_quality_lan results.add(entry) log.debug('Finish search DivxATope with %d entries' % len(results)) return results
def search(self, task, entry, config=None): if not config: log.debug('Divxatope disabled') return set() log.debug('Search DivxATope') url_search = 'http://divxatope1.com/buscar/descargas' results = set() for search_string in entry.get('search_strings', [entry['title']]): query = normalize_unicode(search_string) query = re.sub(' \(\d\d\d\d\)$', '', query) log.debug('Searching DivxATope %s' % query) query = query.encode('utf8', 'ignore') data = {'q': query} try: response = task.requests.post(url_search, data=data) except requests.RequestException as e: log.error('Error searching DivxATope: %s' % e) return content = response.content soup = get_soup(content) if 'divxatope1.com' in url_search: soup2 = soup.find('ul', attrs={'class': 'buscar-list'}) else: soup2 = soup.find('ul', attrs={'class': 'peliculas-box'}) children = soup2.findAll('a', href=True) for child in children: entry = Entry() entry['url'] = child['href'] entry_title = child.find('h2') if entry_title is None: continue entry_title = entry_title.contents if not entry_title: continue else: entry_title = entry_title[0] quality_lan = child.find('strong') if quality_lan is None: continue quality_lan = quality_lan.contents if len(quality_lan) > 2: if (isinstance(quality_lan[0], Tag)): entry_quality_lan = quality_lan[1] else: entry_quality_lan = quality_lan[0] + ' ' + quality_lan[2] elif len(quality_lan) == 2: entry_quality_lan = quality_lan[1] entry['title'] = entry_title + ' ' + entry_quality_lan results.add(entry) log.debug('Finish search DivxATope with %d entries' % len(results)) return results
def search(self, task, entry, config=None): config = self.prepare_config(config) if not session.cookies: log.debug('Logging in to %s...' % URL) params = { 'username': config['username'], 'password': config['password'], 'keeplogged': '1', 'login': '******' } session.post(URL + 'login.php', data=params) cat = ''.join(['&' + ('filter_cat[%s]' % id) + '=1' for id in config['category']]) rls = 'release_type=' + config['type'] url_params = rls + cat multip = config['gravity_multiplier'] entries = set() for search_string in entry.get('search_strings', [entry['title']]): srch = normalize_unicode(clean_title(search_string)) srch = '&searchstr=' + quote(srch.encode('utf8')) url = URL + 'torrents.php?' + url_params + srch log.debug('Fetching URL for `%s`: %s' % (search_string, url)) page = session.get(url).content soup = get_soup(page) for result in soup.findAll('tr', attrs={'class': 'torrent'}): entry = Entry() entry['title'] = result.find('span', attrs={'class': 'torrent_name_link'}).text entry['url'] = URL + result.find('a', href=re.compile('torrents\.php\?action=download')).get('href') entry['torrent_seeds'], entry['torrent_leeches'] = [r.text for r in result.findAll('td')[-2:]] entry['search_sort'] = torrent_availability(entry['torrent_seeds'], entry['torrent_leeches']) * multip size = result.findAll('td')[-4].text size = re.search('(\d+(?:[.,]\d+)*)\s?([KMG]B)', size) if size: if size.group(2) == 'GB': entry['content_size'] = int(float(size.group(1).replace(',', '')) * 1000 ** 3 / 1024 ** 2) elif size.group(2) == 'MB': entry['content_size'] = int(float(size.group(1).replace(',', '')) * 1000 ** 2 / 1024 ** 2) elif size.group(2) == 'KB': entry['content_size'] = int(float(size.group(1).replace(',', '')) * 1000 / 1024 ** 2) else: entry['content_size'] = int(float(size.group(1).replace(',', '')) / 1024 ** 2) entries.add(entry) return entries
def entries_from_search(self, name, url=None): """Parses torrent download url from search results""" name = normalize_unicode(name) if not url: url = "http://www.newtorrents.info/search/%s" % urllib.quote(name.encode("utf-8"), safe=b":/~?=&%") log.debug("search url: %s" % url) html = urlopener(url, log).read() # fix </SCR'+'IPT> so that BS does not crash # TODO: should use beautifulsoup massage html = re.sub(r"(</SCR.*?)...(.*?IPT>)", r"\1\2", html) soup = get_soup(html) # saving torrents in dict torrents = [] for link in soup.find_all("a", attrs={"href": re.compile("down.php")}): torrent_url = "http://www.newtorrents.info%s" % link.get("href") release_name = link.parent.next.get("title") # quick dirty hack seed = link.find_next("td", attrs={"class": re.compile("s")}).renderContents() if seed == "n/a": seed = 0 else: try: seed = int(seed) except ValueError: log.warning("Error converting seed value (%s) from newtorrents to integer." % seed) seed = 0 # TODO: also parse content_size and peers from results torrents.append( Entry( title=release_name, url=torrent_url, torrent_seeds=seed, search_sort=torrent_availability(seed, 0) ) ) # sort with seed number Reverse order torrents.sort(reverse=True, key=lambda x: x.get("search_sort", 0)) # choose the torrent if not torrents: dashindex = name.rfind("-") if dashindex != -1: return self.entries_from_search(name[:dashindex]) else: return torrents else: if len(torrents) == 1: log.debug("found only one matching search result.") else: log.debug("search result contains multiple matches, sorted %s by most seeders" % torrents) return torrents
def search(self, task, entry, config=None): config = self.process_config(config) feed = REPUTATIONS[config['reputation']] entries = set() for search_string in entry.get('search_strings', [entry['title']]): query = normalize_unicode(search_string + config.get('extra_terms', '')) for domain in ['eu', 'me', 'ch', 'in']: # urllib.quote will crash if the unicode string has non ascii characters, so encode in utf-8 beforehand url = 'http://torrentz.%s/%s?q=%s' % (domain, feed, quote(query.encode('utf-8'))) log.debug('requesting: %s' % url) try: r = task.requests.get(url) break except requests.ConnectionError as err: # The different domains all resolve to the same ip, so only try more if it was a dns error log.warning('torrentz.%s connection failed. Error: %s' % (domain, err)) continue except requests.RequestException as err: raise plugin.PluginError('Error getting torrentz search results: %s' % err) else: raise plugin.PluginError('Error getting torrentz search results') if not r.content.strip(): raise plugin.PluginError('No data from %s. Maybe torrentz is blocking the FlexGet User-Agent' % url) rss = feedparser.parse(r.content) if rss.get('bozo_exception'): raise plugin.PluginError('Got bozo_exception (bad rss feed)') for item in rss.entries: m = re.search(r'Size: ([\d]+) Mb Seeds: ([,\d]+) Peers: ([,\d]+) Hash: ([a-f0-9]+)', item.description, re.IGNORECASE) if not m: log.debug('regexp did not find seeds / peer data') continue entry = Entry() entry['title'] = item.title entry['url'] = item.link entry['content_size'] = int(m.group(1)) entry['torrent_seeds'] = int(m.group(2).replace(',', '')) entry['torrent_leeches'] = int(m.group(3).replace(',', '')) entry['torrent_info_hash'] = m.group(4).upper() entry['search_sort'] = torrent_availability(entry['torrent_seeds'], entry['torrent_leeches']) entries.add(entry) log.debug('Search got %d results' % len(entries)) return entries
def search(self, entry, config=None): if config: feed = REPUTATIONS[config] else: feed = REPUTATIONS['good'] query = normalize_unicode(entry['title']) # urllib.quote will crash if the unicode string has non ascii characters, so encode in utf-8 beforehand url = 'http://torrentz.eu/%s?q=%s' % ( feed, urllib.quote(query.encode('utf-8'))) log.debug('requesting: %s' % url) rss = feedparser.parse(url) entries = [] status = rss.get('status', False) if status != 200: raise PluginWarning('Search result not 200 (OK), received %s' % status) ex = rss.get('bozo_exception', False) if ex: raise PluginWarning('Got bozo_exception (bad feed)') for item in rss.entries: m = re.search( r'Size: ([\d]+) Mb Seeds: ([,\d]+) Peers: ([,\d]+) Hash: ([a-f0-9]+)', item.description, re.IGNORECASE) if not m: log.debug('regexp did not find seeds / peer data') continue entry = Entry() entry['title'] = item.title entry['url'] = item.link entry['content_size'] = int(m.group(1)) entry['torrent_seeds'] = int(m.group(2).replace(',', '')) entry['torrent_leeches'] = int(m.group(3).replace(',', '')) entry['torrent_info_hash'] = m.group(4).upper() entry['search_sort'] = torrent_availability( entry['torrent_seeds'], entry['torrent_leeches']) entries.append(entry) # choose torrent if not entries: raise PluginWarning('No close matches for %s' % query, log, log_once=True) entries.sort(reverse=True, key=lambda x: x.get('search_sort')) log.debug('Search got %d results' % len(entries)) return entries
def search(self, task, entry, config=None): """ Search for name from piratebay. """ if not isinstance(config, dict): config = {} sort = SORT.get(config.get("sort_by", "seeds")) if config.get("sort_reverse"): sort += 1 if isinstance(config.get("category"), int): category = config["category"] else: category = CATEGORIES.get(config.get("category", "all")) filter_url = "/0/%d/%d" % (sort, category) entries = set() for search_string in entry.get("search_strings", [entry["title"]]): query = normalize_unicode(search_string) # TPB search doesn't like dashes query = query.replace("-", " ") # urllib.quote will crash if the unicode string has non ascii characters, so encode in utf-8 beforehand url = "http://thepiratebay.%s/search/%s%s" % (CUR_TLD, quote(query.encode("utf-8")), filter_url) log.debug("Using %s as piratebay search url" % url) page = requests.get(url).content soup = get_soup(page) for link in soup.find_all("a", attrs={"class": "detLink"}): entry = Entry() entry["title"] = self.extract_title(link) if not entry["title"]: log.error("Malformed search result. No title or url found. Skipping.") continue entry["url"] = "http://thepiratebay.%s%s" % (CUR_TLD, link.get("href")) tds = link.parent.parent.parent.find_all("td") entry["torrent_seeds"] = int(tds[-2].contents[0]) entry["torrent_leeches"] = int(tds[-1].contents[0]) entry["search_sort"] = torrent_availability(entry["torrent_seeds"], entry["torrent_leeches"]) # Parse content_size size = link.find_next(attrs={"class": "detDesc"}).contents[0] size = re.search("Size ([\.\d]+)\xa0([GMK])iB", size) if size: if size.group(2) == "G": entry["content_size"] = int(float(size.group(1)) * 1000 ** 3 / 1024 ** 2) elif size.group(2) == "M": entry["content_size"] = int(float(size.group(1)) * 1000 ** 2 / 1024 ** 2) else: entry["content_size"] = int(float(size.group(1)) * 1000 / 1024 ** 2) entries.add(entry) return sorted(entries, reverse=True, key=lambda x: x.get("search_sort"))
def search(self, entry, config=None): config = self.process_config(config) feed = REPUTATIONS[config['reputation']] entries = set() for search_string in entry.get('search_string', [entry['title']]): query = normalize_unicode(search_string+config.get('extra_terms', '')) # urllib.quote will crash if the unicode string has non ascii characters, so encode in utf-8 beforehand url = 'http://torrentz.eu/%s?q=%s' % (feed, urllib.quote(query.encode('utf-8'))) log.debug('requesting: %s' % url) try: opened = urllib2.urlopen(url) except urllib2.URLError as err: url = 'http://torrentz.me/%s?q=%s' % (feed, urllib.quote(query.encode('utf-8'))) log.warning('torrentz.eu failed, trying torrentz.me. Error: %s' % err) try: opened = urllib2.urlopen(url) except urllib2.URLError as err: raise plugin.PluginWarning('Error requesting URL: %s' % err) rss = feedparser.parse(opened) status = rss.get('status', False) if status != 200: raise plugin.PluginWarning( 'Search result not 200 (OK), received %s %s' % (status, opened.msg)) ex = rss.get('bozo_exception', False) if ex: raise plugin.PluginWarning('Got bozo_exception (bad feed)') for item in rss.entries: m = re.search(r'Size: ([\d]+) Mb Seeds: ([,\d]+) Peers: ([,\d]+) Hash: ([a-f0-9]+)', item.description, re.IGNORECASE) if not m: log.debug('regexp did not find seeds / peer data') continue entry = Entry() entry['title'] = item.title entry['url'] = item.link entry['content_size'] = int(m.group(1)) entry['torrent_seeds'] = int(m.group(2).replace(',', '')) entry['torrent_leeches'] = int(m.group(3).replace(',', '')) entry['torrent_info_hash'] = m.group(4).upper() entry['search_sort'] = torrent_availability(entry['torrent_seeds'], entry['torrent_leeches']) entries.add(entry) log.debug('Search got %d results' % len(entries)) return entries
def search(self, entry, config=None): config = self.process_config(config) feed = REPUTATIONS[config['reputation']] entries = set() for search_string in entry.get('search_string', [entry['title']]): query = normalize_unicode(search_string + config.get('extra_terms', '')) for domain in ['eu', 'me']: # urllib.quote will crash if the unicode string has non ascii characters, so encode in utf-8 beforehand url = 'http://torrentz.%s/%s?q=%s' % ( domain, feed, urllib.quote(query.encode('utf-8'))) log.debug('requesting: %s' % url) try: r = requests.get(url) break except requests.RequestException as err: log.warning('torrentz.%s failed. Error: %s' % (domain, err)) else: raise plugin.PluginWarning( 'Error getting torrentz search results') rss = feedparser.parse(r.content) ex = rss.get('bozo_exception', False) if ex: raise plugin.PluginWarning('Got bozo_exception (bad feed)') for item in rss.entries: m = re.search( r'Size: ([\d]+) Mb Seeds: ([,\d]+) Peers: ([,\d]+) Hash: ([a-f0-9]+)', item.description, re.IGNORECASE) if not m: log.debug('regexp did not find seeds / peer data') continue entry = Entry() entry['title'] = item.title entry['url'] = item.link entry['content_size'] = int(m.group(1)) entry['torrent_seeds'] = int(m.group(2).replace(',', '')) entry['torrent_leeches'] = int(m.group(3).replace(',', '')) entry['torrent_info_hash'] = m.group(4).upper() entry['search_sort'] = torrent_availability( entry['torrent_seeds'], entry['torrent_leeches']) entries.add(entry) log.debug('Search got %d results' % len(entries)) return entries
def search(self, entry, config=None): from flexget.utils.template import environment from flexget.manager import manager query = entry['title'] search_string = urllib.quote(normalize_unicode(query).encode('utf-8')) rss_plugin = get_plugin_by_name('rss') # Create a fake task to pass to the rss plugin input handler task = Task(manager, 'search_rss_task', {}) # Use a copy of the config, so we don't overwrite jinja url when filling in search term config = rss_plugin.instance.build_config(config).copy() template = environment.from_string(config['url']) config['url'] = template.render({'search_term': search_string}) config['all_entries'] = True # TODO: capture some other_fields to try to find seed/peer/content_size numbers? return rss_plugin.phase_handlers['input'](task, config)
def search(self, task, entry, config=None): """ Search for entries on Serienjunkies """ base_url = 'http://serienjunkies.org/search/' mull = {"Dauer:", "Download:", "Uploader:", u"Größe:", u"Tonhöhe:", "Sprache:", "Format:", "HQ-Cover:"} self.config = task.config.get('searchSerienjunkies') or {} self.config.setdefault('hoster', DEFHOS) self.config.setdefault('language', DEFLANG) entries = set() for search_string in entry.get('search_strings', [entry['title']]): query = normalize_unicode(search_string) query_url_fragment = urllib.quote(query.encode('utf8')) # http://serienjunkies.org/search/QUERY url = (base_url + query_url_fragment) log.debug('Serienjunkies search url: %s' % url) page = requests.get(url).content soup = get_soup(page) hoster = self.config['hoster'] if self.config['language'] == 'english': english = True else: english = None for p in soup.find_all('p'): entry = Entry() if p.strong is not None and p.strong.text not in mull: if english: try: if not p.strong.find(text=re.compile("german", flags=re.IGNORECASE)): link = p.find(text=re.compile(hoster)).find_previous('a') entry['title'] = p.strong.text entry['url'] = link.get('href') entries.add(entry) except: pass else: try: if p.strong.find(text=re.compile("german", flags=re.IGNORECASE)): link = p.find(text=re.compile(hoster)).find_previous('a') entry['title'] = p.strong.text entry['url'] = link.get('href') entries.add(entry) except: pass return entries
def entries_from_search(self, name, url=None): """Parses torrent download url from search results""" name = normalize_unicode(name) if not url: url = 'http://www.newtorrents.info/search/%s' % urllib.quote(name.encode('utf-8'), safe=':/~?=&%') log.debug('search url: %s' % url) html = urlopener(url, log).read() # fix </SCR'+'IPT> so that BS does not crash # TODO: should use beautifulsoup massage html = re.sub(r'(</SCR.*?)...(.*?IPT>)', r'\1\2', html) soup = get_soup(html) # saving torrents in dict torrents = [] for link in soup.find_all('a', attrs={'href': re.compile('down.php')}): torrent_url = 'http://www.newtorrents.info%s' % link.get('href') release_name = link.parent.next.get('title') # quick dirty hack seed = link.find_next('td', attrs={'class': re.compile('s')}).renderContents() if seed == 'n/a': seed = 0 else: try: seed = int(seed) except ValueError: log.warning('Error converting seed value (%s) from newtorrents to integer.' % seed) seed = 0 #TODO: also parse content_size and peers from results torrents.append(Entry(title=release_name, url=torrent_url, torrent_seeds=seed, search_sort=torrent_availability(seed, 0))) # sort with seed number Reverse order torrents.sort(reverse=True, key=lambda x: x.get('search_sort', 0)) # choose the torrent if not torrents: dashindex = name.rfind('-') if dashindex != -1: return self.entries_from_search(name[:dashindex]) else: raise PluginWarning('No matches for %s' % name, log, log_once=True) else: if len(torrents) == 1: log.debug('found only one matching search result.') else: log.debug('search result contains multiple matches, sorted %s by most seeders' % torrents) return torrents
def search(self, entry, config=None): config = self.process_config(config) feed = REPUTATIONS[config["reputation"]] entries = set() for search_string in entry.get("search_strings", [entry["title"]]): query = normalize_unicode(search_string + config.get("extra_terms", "")) for domain in ["eu", "me"]: # urllib.quote will crash if the unicode string has non ascii characters, so encode in utf-8 beforehand url = "http://torrentz.%s/%s?q=%s" % (domain, feed, urllib.quote(query.encode("utf-8"))) log.debug("requesting: %s" % url) try: r = requests.get(url, headers={"User-Agent": "FlexGet/%s" % flexget.__version__}) break except requests.RequestException as err: log.warning("torrentz.%s failed. Error: %s" % (domain, err)) else: raise plugin.PluginError("Error getting torrentz search results") if not r.content.strip(): raise plugin.PluginError("No data from %s. Maybe torrentz is blocking the FlexGet User-Agent" % url) rss = feedparser.parse(r.content) if rss.get("bozo_exception"): raise plugin.PluginError("Got bozo_exception (bad rss feed)") for item in rss.entries: m = re.search( r"Size: ([\d]+) Mb Seeds: ([,\d]+) Peers: ([,\d]+) Hash: ([a-f0-9]+)", item.description, re.IGNORECASE, ) if not m: log.debug("regexp did not find seeds / peer data") continue entry = Entry() entry["title"] = item.title entry["url"] = item.link entry["content_size"] = int(m.group(1)) entry["torrent_seeds"] = int(m.group(2).replace(",", "")) entry["torrent_leeches"] = int(m.group(3).replace(",", "")) entry["torrent_info_hash"] = m.group(4).upper() entry["search_sort"] = torrent_availability(entry["torrent_seeds"], entry["torrent_leeches"]) entries.add(entry) log.debug("Search got %d results" % len(entries)) return entries
def search(self, entry, config): search_strings = [ normalize_unicode(s).lower() for s in entry.get('search_strings', [entry['title']]) ] entries = set() for search_string in search_strings: if config.get('verified'): search_string += ' verified:1' url = 'http://kickass.to/search/%s/?rss=1' % urllib.quote( search_string.encode('utf-8')) if config.get('category', 'all') != 'all': url += '&category=%s' % config['category'] log.debug('requesting: %s' % url) rss = feedparser.parse(url) status = rss.get('status', False) if status != 200: raise plugin.PluginWarning( 'Search result not 200 (OK), received %s' % status) ex = rss.get('bozo_exception', False) if ex: raise plugin.PluginWarning('Got bozo_exception (bad feed)') for item in rss.entries: entry = Entry() entry['title'] = item.title if not item.get('enclosures'): log.warning( 'Could not get url for entry from KAT. Maybe plugin needs updated?' ) continue entry['url'] = item.enclosures[0]['url'] entry['torrent_seeds'] = int(item.torrent_seeds) entry['torrent_leeches'] = int(item.torrent_peers) entry['search_sort'] = torrent_availability( entry['torrent_seeds'], entry['torrent_leeches']) entry['content_size'] = int( item.torrent_contentlength) / 1024 / 1024 entry['torrent_info_hash'] = item.torrent_infohash entries.add(entry) return entries
def url_rewrite(self, task, entry): try: page = task.requests.get(entry['url']) except RequestException as e: raise UrlRewritingError(str(e)) try: soup = get_soup(page.text) except Exception as e: raise UrlRewritingError(str(e)) link_elements = soup.find_all('pre', class_='links') if 'urls' in entry: urls = list(entry['urls']) else: urls = [] for element in link_elements: urls.extend(element.text.splitlines()) regexps = self.config.get('filehosters_re', []) filtered_urls = [] for i, url in enumerate(urls): urls[i] = normalize_unicode(url) for regexp in regexps: if re.search(regexp, urls[i]): filtered_urls.append(urls[i]) log.debug('Url: "%s" matched filehoster filter: %s', urls[i], regexp) break else: if regexps: log.debug( 'Url: "%s" does not match any of the given filehoster filters: %s', urls[i], str(regexps)) if regexps: log.debug('Using filehosters_re filters: %s', str(regexps)) urls = filtered_urls else: log.debug( 'No filehoster filters configured, using all found links.') num_links = len(urls) log.verbose('Found %d links at %s.', num_links, entry['url']) if num_links: entry['urls'] = urls entry['url'] = urls[0] else: raise UrlRewritingError('No useable links found at %s' % entry['url'])
def search(self, task, entry, config=None): if not config: log.debug('NewPCT disabled') return set() log.debug('Search NewPCT') url_search = 'http://newpct1.com/buscar' results = set() for search_string in entry.get('search_strings', [entry['title']]): query = normalize_unicode(search_string) query = re.sub(' \(\d\d\d\d\)$', '', query) log.debug('Searching NewPCT %s', query) query = unicodedata.normalize('NFD', query).encode('ascii', 'ignore') data = {'q': query} try: response = task.requests.post(url_search, data=data) except requests.RequestException as e: log.error('Error searching NewPCT: %s', e) return results content = response.content soup = get_soup(content) soup2 = soup.find('ul', attrs={'class': 'buscar-list'}) children = soup2.findAll('a', href=True) for child in children: entry = Entry() entry['url'] = child['href'] entry_title = child.find('h2') if entry_title is None: log.debug('Ignore empty entry') continue entry_title = entry_title.text if not entry_title: continue try: entry_quality_lan = re.search( '.+ \[([^\]]+)\](\[[^\]]+\])+$', entry_title).group(1) except AttributeError: log.debug('Quality not found') continue entry_title = re.sub(' \[.+]$', '', entry_title) entry['title'] = entry_title + ' ' + entry_quality_lan results.add(entry) log.debug('Finish search NewPCT with %d entries', len(results)) return results
def search(self, entry, config=None): if config: feed = REPUTATIONS[config] else: feed = REPUTATIONS['good'] query = normalize_unicode(entry['title']) # urllib.quote will crash if the unicode string has non ascii characters, so encode in utf-8 beforehand url = 'http://torrentz.eu/%s?q=%s' % (feed, urllib.quote(query.encode('utf-8'))) log.debug('requesting: %s' % url) rss = feedparser.parse(url) entries = [] status = rss.get('status', False) if status != 200: raise PluginWarning('Search result not 200 (OK), received %s' % status) ex = rss.get('bozo_exception', False) if ex: raise PluginWarning('Got bozo_exception (bad feed)') for item in rss.entries: m = re.search(r'Size: ([\d]+) Mb Seeds: ([,\d]+) Peers: ([,\d]+) Hash: ([a-f0-9]+)', item.description, re.IGNORECASE) if not m: log.debug('regexp did not find seeds / peer data') continue entry = Entry() entry['title'] = item.title entry['url'] = item.link entry['content_size'] = int(m.group(1)) entry['torrent_seeds'] = int(m.group(2).replace(',', '')) entry['torrent_leeches'] = int(m.group(3).replace(',', '')) entry['torrent_info_hash'] = m.group(4).upper() entry['search_sort'] = torrent_availability(entry['torrent_seeds'], entry['torrent_leeches']) entries.append(entry) # choose torrent if not entries: raise PluginWarning('No close matches for %s' % query, log, log_once=True) entries.sort(reverse=True, key=lambda x: x.get('search_sort')) log.debug('Search got %d results' % len(entries)) return entries
def search(self, task, entry, config=None): """ Search for name from fuzer. """ self.rss_key = config['rss_key'] self.user_id = config['user_id'] self.cookies = { 'fzr2lastactivity': '0', 'fzr2lastvisit': '', 'fzr2password': config['cookie_password'], 'fzr2sessionhash': '', 'fzr2userid': str(self.user_id) } category = config.get('category', [0]) # Make sure categories is a list if not isinstance(category, list): category = [category] # If there are any text categories, turn them into their id number categories = [ c if isinstance(c, int) else CATEGORIES[c] for c in category ] c_list = ['c{}={}'.format(quote_plus('[]'), c) for c in categories] entries = [] if entry.get('imdb_id'): log.debug("imdb_id '%s' detected, using in search.", entry['imdb_id']) soup = self.get_fuzer_soup(entry['imdb_id'], c_list) entries = self.extract_entry_from_soup(soup) if entries: for e in list(entries): e['imdb_id'] = entry.get('imdb_id') else: for search_string in entry.get('search_strings', [entry['title']]): query = normalize_unicode(search_string).replace(":", "") text = quote_plus(query.encode('windows-1255')) soup = self.get_fuzer_soup(text, c_list) entries += self.extract_entry_from_soup(soup) return sorted(entries, reverse=True, key=lambda x: x.get('search_sort')) if entries else []
def search(self, entry, config=None): config = self.process_config(config) feed = REPUTATIONS[config['reputation']] entries = set() for search_string in entry.get('search_string', [entry['title']]): query = normalize_unicode(search_string+config.get('extra_terms', '')) for domain in ['eu', 'me']: # urllib.quote will crash if the unicode string has non ascii characters, so encode in utf-8 beforehand url = 'http://torrentz.%s/%s?q=%s' % (domain, feed, urllib.quote(query.encode('utf-8'))) log.debug('requesting: %s' % url) try: r = requests.get(url) break except requests.RequestException as err: log.warning('torrentz.%s failed. Error: %s' % (domain, err)) else: raise plugin.PluginWarning('Error getting torrentz search results') rss = feedparser.parse(r.content) ex = rss.get('bozo_exception', False) if ex: raise plugin.PluginWarning('Got bozo_exception (bad feed)') for item in rss.entries: m = re.search(r'Size: ([\d]+) Mb Seeds: ([,\d]+) Peers: ([,\d]+) Hash: ([a-f0-9]+)', item.description, re.IGNORECASE) if not m: log.debug('regexp did not find seeds / peer data') continue entry = Entry() entry['title'] = item.title entry['url'] = item.link entry['content_size'] = int(m.group(1)) entry['torrent_seeds'] = int(m.group(2).replace(',', '')) entry['torrent_leeches'] = int(m.group(3).replace(',', '')) entry['torrent_info_hash'] = m.group(4).upper() entry['search_sort'] = torrent_availability(entry['torrent_seeds'], entry['torrent_leeches']) entries.add(entry) log.debug('Search got %d results' % len(entries)) return entries
def search(self, task, entry, config=None): entries = set() search_strings = [normalize_unicode(s) for s in entry.get('search_strings', [entry['title']])] for search_string in search_strings: url = 'https://yts.am/api/v2/list_movies.json?query_term=%s' % ( urllib.quote(search_string.encode('utf-8'))) log.debug('requesting: %s' % url) try: result = requests.get(url) try: data = result.json() except ValueError: log.debug('Could not decode json from response: %s', result.text) raise plugin.PluginError('Error getting result from yts.') except requests.RequestException as e: raise plugin.PluginError('Could not retrieve query from yts (%s)' % e.args[0]) if not data['status'] == 'ok': raise plugin.PluginError('failed to query YTS') try: if data['data']['movie_count'] > 0: for item in data['data']['movies']: for torrent in item['torrents']: entry = Entry() entry['title'] = item['title'] entry['year'] = item['year'] entry['url'] = torrent['url'] entry['content_size'] = parse_filesize(str(torrent['size_bytes']) + "b") entry['torrent_seeds'] = torrent['seeds'] entry['torrent_leeches'] = torrent['peers'] entry['torrent_info_hash'] = torrent['hash'] entry['search_sort'] = torrent_availability(entry['torrent_seeds'], entry['torrent_leeches']) entry['quality'] = torrent['quality'] entry['imdb_id'] = item['imdb_code'] if entry.isvalid(): entries.add(entry) except Exception: log.debug('invalid return structure from YTS') log.debug('Search got %d results' % len(entries)) return entries
def search(self, task, entry, config=None): if not isinstance(config, dict): config = {} category = CATEGORIES.get(config.get('category', 'all'), None) category_query = '&cid=%d' % category if category else '' entries = set() for search_string in entry.get('search_strings', [entry['title']]): query = normalize_unicode(search_string) search_query = '&search=%s' % quote(query.encode('utf-8')) url = ('http://extratorrent.cc/rss.xml?type=search%s%s' % (category_query, search_query)) log.debug('Using %s as extratorrent search url' % url) rss = feedparser.parse(url) status = rss.get('status', False) if status != 200: log.debug('Search result not 200 (OK), received %s' % status) if not status or status >= 400: continue for item in rss.entries: entry = Entry() entry['title'] = item.title entry['url'] = item.link entry['content_size'] = int(item.size) / 1024 / 1024 entry['torrent_info_hash'] = item.info_hash if isinstance(item.seeders, int): entry['torrent_seeds'] = int(item.seeders) if isinstance(item.leechers, int): entry['torrent_leeches'] = int(item.leechers) entries.add(entry) return entries
def search(self, task, entry, config): if not isinstance(config, dict): config = {'category': config} config.setdefault('category', 'anime eng') config.setdefault('filter', 'all') entries = set() for search_string in entry.get('search_strings', [entry['title']]): name = normalize_unicode(search_string) url = 'https://www.nyaa.si/?page=rss&q=%s&c=%s&f=%s' % ( quote(name.encode('utf-8')), CATEGORIES[config['category']], FILTERS.index(config['filter'])) log.debug('requesting: %s' % url) rss = feedparser.parse(url) status = rss.get('status', False) if status != 200: log.debug('Search result not 200 (OK), received %s' % status) if status >= 400: continue ex = rss.get('bozo_exception', False) if ex: log.error('Got bozo_exception (bad feed) on %s' % url) continue for item in rss.entries: entry = Entry() entry['title'] = item.title entry['url'] = item.link entry['torrent_seeds'] = int(item.nyaa_seeders) entry['torrent_leeches'] = int(item.nyaa_leechers) entry['torrent_info_hash'] = item.nyaa_infohash entry['search_sort'] = torrent_availability( entry['torrent_seeds'], entry['torrent_leeches']) if item.nyaa_size: entry['content_size'] = parse_filesize(item.nyaa_size) entries.add(entry) return entries
def search(self, entry, config): if not isinstance(config, dict): config = {'category': config} config.setdefault('category', 'anime') config.setdefault('filter', 'all') entries = set() for search_string in entry.get('search_strings', [entry['title']]): name = normalize_unicode(search_string) url = 'http://www.nyaa.se/?page=rss&cats=%s&filter=%s&term=%s' % ( CATEGORIES[config['category']], FILTERS.index( config['filter']), urllib.quote(name.encode('utf-8'))) log.debug('requesting: %s' % url) rss = feedparser.parse(url) status = rss.get('status', False) if status != 200: log.debug('Search result not 200 (OK), received %s' % status) if status >= 400: continue ex = rss.get('bozo_exception', False) if ex: log.error('Got bozo_exception (bad feed) on %s' % url) continue for item in rss.entries: entry = Entry() entry['title'] = item.title entry['url'] = item.link # TODO: parse some shit #entry['torrent_seeds'] = int(item.seeds) #entry['torrent_leeches'] = int(item.leechs) #entry['search_sort'] = torrent_availability(entry['torrent_seeds'], entry['torrent_leeches']) #entry['content_size'] = int(item.size) / 1024 / 1024 entries.add(entry) return entries