def create_entries(self, soup, imdb_id=None): entries = [] links = soup.findAll( 'a', attrs={'href': re.compile('download\.php\?torrent=\d+')}) rows = [l.find_parent('tr') for l in links] for row in rows: entry = Entry() entry['title'] = row.find('a', attrs={ 'href': re.compile('detail\.php\?id') }).text dl_href = row.find('a', attrs={ 'href': re.compile('download\.php\?torrent=\d+') }).get('href') entry['url'] = 'http://piratethenet.org' + dl_href entry['torrent_seeds'] = int( row.find(title='Number of Seeders').text) entry['torrent_leeches'] = int( row.find(title='Number of Leechers').text) entry['search_sort'] = torrent_availability( entry['torrent_seeds'], entry['torrent_leeches']) entry['content_size'] = parse_filesize(str( row.find(title='Torrent size').text), si=False) if imdb_id: entry['imdb_id'] = imdb_id entries.append(entry) return entries
def search(self, query, comparator=StringComparator(), config=None): """ Search for name from piratebay. """ if not isinstance(config, dict): config = {} sort = SORT.get(config.get('sort_by', 'seeds')) if config.get('sort_reverse'): sort += 1 if isinstance(config.get('category'), int): category = config['category'] else: category = CATEGORIES.get(config.get('category', 'all')) filter_url = '/0/%d/%d' % (sort, category) comparator.set_seq1(query) query = comparator.search_string() # urllib.quote will crash if the unicode string has non ascii characters, so encode in utf-8 beforehand url = 'http://thepiratebay.se/search/' + urllib.quote(query.encode('utf-8')) + filter_url log.debug('Using %s as piratebay search url' % url) page = requests.get(url).content soup = get_soup(page) entries = [] for link in soup.find_all('a', attrs={'class': 'detLink'}): comparator.set_seq2(link.contents[0]) log.debug('name: %s' % comparator.a) log.debug('found name: %s' % comparator.b) log.debug('confidence: %s' % comparator.ratio()) if not comparator.matches(): continue entry = Entry() entry['title'] = link.contents[0] entry['url'] = 'http://thepiratebay.se' + link.get('href') tds = link.parent.parent.parent.find_all('td') entry['torrent_seeds'] = int(tds[-2].contents[0]) entry['torrent_leeches'] = int(tds[-1].contents[0]) entry['search_ratio'] = comparator.ratio() entry['search_sort'] = torrent_availability(entry['torrent_seeds'], entry['torrent_leeches']) # Parse content_size size = link.find_next(attrs={'class': 'detDesc'}).contents[0] size = re.search('Size ([\.\d]+)\xa0([GMK])iB', size) if size: if size.group(2) == 'G': entry['content_size'] = int(float(size.group(1)) * 1000 ** 3 / 1024 ** 2) elif size.group(2) == 'M': entry['content_size'] = int(float(size.group(1)) * 1000 ** 2 / 1024 ** 2) else: entry['content_size'] = int(float(size.group(1)) * 1000 / 1024 ** 2) entries.append(entry) if not entries: dashindex = query.rfind('-') if dashindex != -1: return self.search(query[:dashindex], comparator=comparator) else: raise PluginWarning('No close matches for %s' % query, log, log_once=True) entries.sort(reverse=True, key=lambda x: x.get('search_sort')) return entries
def extract_entry_from_soup(self, soup): table = soup.find('div', {'id': 'main_table'}).find('table', {'class': 'table_info'}) if len(table.find_all('tr')) == 1: log.debug('No search results were returned, continuing') return [] entries = [] for tr in table.find_all("tr"): if not tr.get('class') or 'colhead_dark' in tr.get('class'): continue name = tr.find('div', {'class': 'main_title'}).find('a').text torrent_name = re.search('\\r\\n(.*)', tr.find('div', {'style': 'float: right;'}).find('a')['title']).group(1) attachment_link = tr.find('div', {'style': 'float: right;'}).find('a')['href'] attachment_id = re.search('attachmentid\=(\d+)', attachment_link).group(1) raw_size = tr.find_all('td', {'class': 'inline_info'})[0].text.strip() seeders = int(tr.find_all('td', {'class': 'inline_info'})[2].text) leechers = int(tr.find_all('td', {'class': 'inline_info'})[3].text) e = Entry() e['title'] = name final_url = 'https://www.fuzer.me/rss/torrent.php/{}/{}/{}/{}'.format(attachment_id, self.user_id, self.rss_key, torrent_name) log.debug('RSS-ified download link: %s' % final_url) e['url'] = final_url e['torrent_seeds'] = seeders e['torrent_leeches'] = leechers e['search_sort'] = torrent_availability(e['torrent_seeds'], e['torrent_leeches']) size = re.search('(\d+.?\d+)([TGMK]?)B', raw_size) e['content_size'] = parse_filesize(size.group(0)) entries.append(e) return entries
def search(self, entry, config): # urllib.quote will crash if the unicode string has non ascii characters, so encode in utf-8 beforehand name = normalize_unicode(entry["title"]) optionlist = [ "misc", "movies", "audio", "tv", "games", "apps", "pics", "anime", "comics", "books", "music video", "unclassified", "all", ] url = "http://isohunt.com/js/rss/%s?iht=%s&noSL" % ( urllib.quote(name.encode("utf-8")), optionlist.index(config), ) log.debug("requesting: %s" % url) rss = feedparser.parse(url) entries = [] status = rss.get("status", False) if status != 200: raise PluginWarning("Search result not 200 (OK), received %s" % status) ex = rss.get("bozo_exception", False) if ex: raise PluginWarning("Got bozo_exception (bad feed)") for item in rss.entries: entry = Entry() entry["title"] = item.title entry["url"] = item.link m = re.search(r"Size: ([\d]+).*Seeds: (\d+).*Leechers: (\d+)", item.description, re.IGNORECASE) if not m: log.debug("regexp did not find seeds / peer data") continue else: log.debug("regexp found size(%s), Seeds(%s) and Leeches(%s)" % (m.group(1), m.group(2), m.group(3))) entry["content_size"] = int(m.group(1)) entry["torrent_seeds"] = int(m.group(2)) entry["torrent_leeches"] = int(m.group(3)) entry["search_sort"] = torrent_availability(entry["torrent_seeds"], entry["torrent_leeches"]) entries.append(entry) # choose torrent if not entries: raise PluginWarning("No close matches for %s" % name, log, log_once=True) entries.sort(reverse=True, key=lambda x: x.get("search_sort")) return entries
def search(self, task, entry, config=None): """ Search for name from iptorrents """ categories = config.get('category', 'All') # Make sure categories is a list if not isinstance(categories, list): categories = [categories] # If there are any text categories, turn them into their id number categories = [c if isinstance(c, int) else CATEGORIES[c] for c in categories] filter_url = '&'.join((str(c) + '=') for c in categories) entries = set() for search_string in entry.get('search_strings', [entry['title']]): query = normalize_unicode(search_string) query = quote_plus(query.encode('utf8')) url = "{base_url}/t?{filter}&q={query}&qf=".format(base_url=BASE_URL, filter=filter_url, query=query) log.debug('searching with url: %s' % url) req = requests.get(url, cookies={'uid': str(config['uid']), 'pass': config['password']}) if '/u/' + str(config['uid']) not in req.text: raise plugin.PluginError("Invalid cookies (user not logged in)...") soup = get_soup(req.content, parser="html.parser") torrents = soup.find('table', {'id': 'torrents'}) results = torrents.findAll('tr') for torrent in results: if torrent.th and 'ac' in torrent.th.get('class'): # Header column continue if torrent.find('td', {'colspan': '99'}): log.debug('No results found for search %s', search_string) return entry = Entry() link = torrent.find('a', href=re.compile('download'))['href'] entry['url'] = "{base}{link}?torrent_pass={key}".format( base=BASE_URL, link=link, key=config.get('rss_key')) entry['title'] = torrent.find('a', href=re.compile('details')).text seeders = torrent.findNext('td', {'class': 'ac t_seeders'}).text leechers = torrent.findNext('td', {'class': 'ac t_leechers'}).text entry['torrent_seeds'] = int(seeders) entry['torrent_leeches'] = int(leechers) entry['search_sort'] = torrent_availability(entry['torrent_seeds'], entry['torrent_leeches']) size = torrent.findNext(text=re.compile('^([\.\d]+) ([GMK]?)B$')) size = re.search('^([\.\d]+) ([GMK]?)B$', size) entry['content_size'] = parse_filesize(size.group(0)) log.debug('Found entry %s', entry) entries.add(entry) return entries
def search(self, task, entry, config=None): """ Search for name from iptorrents """ categories = config.get('category', 'All') # Make sure categories is a list if not isinstance(categories, list): categories = [categories] # If there are any text categories, turn them into their id number categories = [c if isinstance(c, int) else CATEGORIES[c] for c in categories] filter_url = '&'.join((str(c) + '=') for c in categories) entries = set() for search_string in entry.get('search_strings', [entry['title']]): query = normalize_unicode(search_string) query = quote_plus(query.encode('utf8')) url = "{base_url}/t?{filter}&q={query}&qf=".format(base_url=BASE_URL, filter=filter_url, query=query) log.debug('searching with url: %s' % url) req = requests.get(url, cookies={'uid': str(config['uid']), 'pass': config['password']}) if '/u/' + str(config['uid']) not in req.text: raise plugin.PluginError("Invalid cookies (user not logged in)...") soup = get_soup(req.content, parser="html.parser") torrents = soup.find('table', {'id': 'torrents'}) results = torrents.findAll('tr') for torrent in results: if torrent.th and 'ac' in torrent.th.get('class'): # Header column continue if torrent.find('td', {'colspan': '99'}): log.debug('No results found for search %s', search_string) break entry = Entry() link = torrent.find('a', href=re.compile('download'))['href'] entry['url'] = "{base}{link}?torrent_pass={key}".format( base=BASE_URL, link=link, key=config.get('rss_key')) entry['title'] = torrent.find('a', href=re.compile('details')).text seeders = torrent.findNext('td', {'class': 'ac t_seeders'}).text leechers = torrent.findNext('td', {'class': 'ac t_leechers'}).text entry['torrent_seeds'] = int(seeders) entry['torrent_leeches'] = int(leechers) entry['search_sort'] = torrent_availability(entry['torrent_seeds'], entry['torrent_leeches']) size = torrent.findNext(text=re.compile('^([\.\d]+) ([GMK]?)B$')) size = re.search('^([\.\d]+) ([GMK]?)B$', size) entry['content_size'] = parse_filesize(size.group(0)) log.debug('Found entry %s', entry) entries.add(entry) return entries
def search(self, entry, config): url = "https://tehconnection.eu/torrents.php?searchstr=%s" \ % entry.get("imdb_id"); page = urlopener(url, log) soup = get_soup(page) results = set() for row in soup.find_all("tr", class_="group_torrent"): link = row.find(title="Download") info = row.find(colspan="1").contents[3].contents[0].strip() seeders = int(row.find_all("td")[6].contents[0].strip()) leechers = int(row.find_all("td")[7].contents[0].strip()) result = Entry(); result["title"] = entry.get("title") + " / " + info result["imdb_id"] = entry.get("imdb_id") result["url"] = "https://tehconnection.eu" + link.get("href") result["torrent_seeds"] = seeders result["torrent_leeches"] = leechers result["search_sort"] = torrent_availability(result['torrent_seeds'], result['torrent_leeches']) results.add(result) return results
def create_entries(self, soup, passkey=None, imdb_id=None): entries = [] links = soup.findAll( 'a', attrs={'href': re.compile('download\.php\?torrent=\d+')}) rows = [l.find_parent('tr') for l in links] for row in rows: entry = Entry() entry['title'] = row.find('a', attrs={ 'href': re.compile('detail\.php\?id') }).text dl_href = row.find('a', attrs={ 'href': re.compile('download\.php\?torrent=\d+') }).get('href') entry[ 'url'] = 'http://piratethenet.org/' + dl_href + '&passkey=' + passkey entry['torrent_seeds'] = int( row.find(title='Number of Seeders').text) entry['torrent_leeches'] = int( row.find(title='Number of Leechers').text) entry['search_sort'] = torrent_availability( entry['torrent_seeds'], entry['torrent_leeches']) size, unit = row.find(title='Torrent size').text.split(' ') if unit == 'GB': entry['content_size'] = int(float(size) * 1024) elif unit == 'MB': entry['content_size'] = int(float(size)) elif unit == 'KB': entry['content_size'] = int(float(size) / 1024) if imdb_id: entry['imdb_id'] = imdb_id entries.append(entry) return entries
def search(self, query, comparator=StringComparator(), config=None): if config: feed = REPUTATIONS[config] else: feed = REPUTATIONS['good'] # urllib.quote will crash if the unicode string has non ascii characters, so encode in utf-8 beforehand comparator.set_seq1(query) query = comparator.search_string() url = 'http://torrentz.eu/%s?q=%s' % ( feed, urllib.quote(query.encode('utf-8'))) log.debug('requesting: %s' % url) rss = feedparser.parse(url) entries = [] status = rss.get('status', False) if status != 200: raise PluginWarning('Search result not 200 (OK), received %s' % status) ex = rss.get('bozo_exception', False) if ex: raise PluginWarning('Got bozo_exception (bad feed)') for item in rss.entries: # assign confidence score of how close this link is to the name you're looking for. .6 and above is "close" comparator.set_seq2(item.title) log.debug('name: %s' % comparator.a) log.debug('found name: %s' % comparator.b) log.debug('confidence: %s' % comparator.ratio()) if not comparator.matches(): continue m = re.search( r'Size: ([\d]+) Mb Seeds: ([,\d]+) Peers: ([,\d]+) Hash: ([a-f0-9]+)', item.description, re.IGNORECASE) if not m: log.debug('regexp did not find seeds / peer data') continue entry = Entry() entry['title'] = item.title entry['url'] = item.link entry['content_size'] = int(m.group(1)) entry['torrent_seeds'] = int(m.group(2).replace(',', '')) entry['torrent_leeches'] = int(m.group(3).replace(',', '')) entry['torrent_info_hash'] = m.group(4).upper() entry['search_ratio'] = comparator.ratio() entry['search_sort'] = torrent_availability( entry['torrent_seeds'], entry['torrent_leeches']) entries.append(entry) # choose torrent if not entries: raise PluginWarning('No close matches for %s' % query, log, log_once=True) entries.sort(reverse=True, key=lambda x: x.get('search_sort')) log.debug('Search got %d results' % len(entries)) return entries
def search(self, task, entry, config): api_key = config searches = entry.get('search_strings', [entry['title']]) if 'series_name' in entry: search = {'category': 'Episode'} if 'tvdb_id' in entry: search['tvdb'] = entry['tvdb_id'] elif 'tvrage_id' in entry: search['tvrage'] = entry['tvrage_id'] else: search['series'] = entry['series_name'] if 'series_id' in entry: # BTN wants an ep style identifier even for sequence shows if entry.get('series_id_type') == 'sequence': search['name'] = 'S01E%02d' % entry['series_id'] else: search['name'] = entry['series_id'] + '%' # added wildcard search for better results. searches = [search] # If searching by series name ending in a parenthetical, try again without it if there are no results. if search.get('series') and search['series'].endswith(')'): match = re.match('(.+)\([^\(\)]+\)$', search['series']) if match: searches.append(dict(search, series=match.group(1).strip())) results = set() for search in searches: data = json.dumps({'method': 'getTorrents', 'params': [api_key, search], 'id': 1}) try: r = task.requests.post('http://api.btnapps.net/', data=data, headers={'Content-type': 'application/json'}) except requests.RequestException as e: log.error('Error searching btn: %s' % e) continue content = r.json() if not content or not content['result']: log.debug('No results from btn') if content and content.get('error'): log.error('Error searching btn: %s' % content['error'].get('message', content['error'])) continue if 'torrents' in content['result']: for item in content['result']['torrents'].itervalues(): entry = Entry() entry['title'] = item['ReleaseName'] entry['title'] += ' '.join(['', item['Resolution'], item['Source'], item['Codec']]) entry['url'] = item['DownloadURL'] entry['torrent_seeds'] = int(item['Seeders']) entry['torrent_leeches'] = int(item['Leechers']) entry['torrent_info_hash'] = item['InfoHash'] entry['search_sort'] = torrent_availability(entry['torrent_seeds'], entry['torrent_leeches']) if item['TvdbID'] and int(item['TvdbID']): entry['tvdb_id'] = int(item['TvdbID']) if item['TvrageID'] and int(item['TvrageID']): entry['tvrage_id'] = int(item['TvrageID']) results.add(entry) # Don't continue searching if this search yielded results break return results
def search(self, task, entry, config=None): """ Search for entries on SceneAccess """ if not session.cookies: log.debug('Logging in to %s...' % URL) params = {'username': config['username'], 'password': config['password'], 'submit': 'come on in'} session.post(URL + 'login', data=params) if config.has_key('gravity_multiplier'): multip = config['gravity_multiplier'] else: multip = 1 # Prepare queries... BASE_URLS = list() entries = set() for category in self.processCategories(config): BASE_URLS.append(URL + '%(url_path)s?method=2%(category_url_string)s' % category) # Search... for search_string in entry.get('search_strings', [entry['title']]): search_string_normalized = normalize_unicode(clean_title(search_string)) search_string_url_fragment = '&search=' + quote(search_string_normalized.encode('utf8')) for url in BASE_URLS: url += search_string_url_fragment log.debug('Search URL for `%s`: %s' % (search_string, url)) page = session.get(url).content soup = get_soup(page) for result in soup.findAll('tr', attrs={'class': 'tt_row'}): entry = Entry() entry['title'] = result.find('a', href=re.compile(r'details\?id=\d+'))['title'] entry['url'] = URL + result.find('a', href=re.compile(r'.torrent$'))['href'] entry['torrent_seeds'] = result.find('td', attrs={'class': 'ttr_seeders'}).text entry['torrent_leeches'] = result.find('td', attrs={'class': 'ttr_leechers'}).text entry['search_sort'] = torrent_availability(entry['torrent_seeds'], entry['torrent_leeches'])*multip size = result.find('td', attrs={'class': 'ttr_size'}).next size = re.search('(\d+(?:[.,]\d+)*)\s?([KMG]B)', size) if size: if size.group(2) == 'GB': entry['content_size'] = int(float(size.group(1)) * 1000 ** 3 / 1024 ** 2) elif size.group(2) == 'MB': entry['content_size'] = int(float(size.group(1)) * 1000 ** 2 / 1024 ** 2) elif size.group(2) == 'KB': entry['content_size'] = int(float(size.group(1)) * 1000 / 1024 ** 2) else: entry['content_size'] = int(float(size.group(1)) / 1024 ** 2) entries.add(entry) return entries
def search(self, entry, config): api_key = config searches = entry.get('search_strings', [entry['title']]) if 'series_name' in entry: search = {'series': entry['series_name']} if 'series_id' in entry: search['name'] = entry['series_id'] searches = [search] results = [] for search in searches: data = json.dumps({'method': 'getTorrents', 'params': [api_key, search], 'id': 1}) try: r = session.post('http://api.btnapps.net/', data=data, headers={'Content-type': 'application/json'}) except requests.RequestException as e: log.error('Error searching btn: %s' % e) continue content = r.json() if content['result']['results']: for item in content['result']['torrents'].itervalues(): if item['Category'] != 'Episode': continue entry = Entry() entry['title'] = item['ReleaseName'] entry['url'] = item['DownloadURL'] entry['torrent_seeds'] = int(item['Seeders']) entry['torrent_leeches'] = int(item['Leechers']) entry['torrent_info_hash'] = item['InfoHash'] entry['search_sort'] = torrent_availability(entry['torrent_seeds'], entry['torrent_leeches']) if item['TvdbID']: entry['tvdb_id'] = int(item['TvdbID']) results.append(entry) return results
def search(self, task, entry, config=None): """ Search for name from piratebay. """ if not isinstance(config, dict): config = {} sort = SORT.get(config.get('sort_by', 'seeds')) if config.get('sort_reverse'): sort += 1 if isinstance(config.get('category'), int): category = config['category'] else: category = CATEGORIES.get(config.get('category', 'all')) filter_url = '/0/%d/%d' % (sort, category) entries = set() for search_string in entry.get('search_strings', [entry['title']]): query = normalize_unicode(search_string) # TPB search doesn't like dashes query = query.replace('-', ' ') # urllib.quote will crash if the unicode string has non ascii characters, so encode in utf-8 beforehand url = 'http://thepiratebay.%s/search/%s%s' % ( CUR_TLD, quote(query.encode('utf-8')), filter_url) log.debug('Using %s as piratebay search url' % url) page = requests.get(url).content soup = get_soup(page) for link in soup.find_all('a', attrs={'class': 'detLink'}): entry = Entry() entry['title'] = self.extract_title(link) if not entry['title']: log.error( 'Malformed search result. No title or url found. Skipping.' ) continue entry['url'] = 'http://thepiratebay.%s%s' % (CUR_TLD, link.get('href')) tds = link.parent.parent.parent.find_all('td') entry['torrent_seeds'] = int(tds[-2].contents[0]) entry['torrent_leeches'] = int(tds[-1].contents[0]) entry['search_sort'] = torrent_availability( entry['torrent_seeds'], entry['torrent_leeches']) # Parse content_size size = link.find_next(attrs={'class': 'detDesc'}).contents[0] size = re.search('Size ([\.\d]+)\xa0([GMK])iB', size) if size: if size.group(2) == 'G': entry['content_size'] = int( float(size.group(1)) * 1000**3 / 1024**2) elif size.group(2) == 'M': entry['content_size'] = int( float(size.group(1)) * 1000**2 / 1024**2) else: entry['content_size'] = int( float(size.group(1)) * 1000 / 1024**2) entries.add(entry) return sorted(entries, reverse=True, key=lambda x: x.get('search_sort'))
def search(self, task, entry, config=None): """ Search for name from torrentleech. """ request_headers = {'User-Agent': 'curl/7.54.0'} rss_key = config['rss_key'] # build the form request: data = {'username': config['username'], 'password': config['password']} # POST the login form: try: login = task.requests.post('https://www.torrentleech.org/user/account/login/', data=data, headers=request_headers, allow_redirects=True) except RequestException as e: raise PluginError('Could not connect to torrentleech: %s', str(e)) if not isinstance(config, dict): config = {} # sort = SORT.get(config.get('sort_by', 'seeds')) # if config.get('sort_reverse'): # sort += 1 categories = config.get('category', 'all') # Make sure categories is a list if not isinstance(categories, list): categories = [categories] # If there are any text categories, turn them into their id number categories = [c if isinstance(c, int) else CATEGORIES[c] for c in categories] filter_url = '/categories/{}'.format(','.join(str(c) for c in categories)) entries = set() for search_string in entry.get('search_strings', [entry['title']]): query = normalize_unicode(search_string).replace(":", "") # urllib.quote will crash if the unicode string has non ascii characters, # so encode in utf-8 beforehand url = ('https://www.torrentleech.org/torrents/browse/list/query/' + quote(query.encode('utf-8')) + filter_url) log.debug('Using %s as torrentleech search url', url) results = task.requests.get(url, headers=request_headers, cookies=login.cookies).json() for torrent in results['torrentList']: entry = Entry() entry['download_headers'] = request_headers entry['title'] = torrent['name'] # construct download URL torrent_url = 'https://www.torrentleech.org/rss/download/{}/{}/{}'.format(torrent['fid'], rss_key, torrent['filename']) log.debug('RSS-ified download link: %s', torrent_url) entry['url'] = torrent_url # seeders/leechers entry['torrent_seeds'] = torrent['seeders'] entry['torrent_leeches'] = torrent['leechers'] entry['search_sort'] = torrent_availability(entry['torrent_seeds'], entry['torrent_leeches']) entry['content_size'] = parse_filesize(str(torrent['size']) + ' b') entries.add(entry) return sorted(entries, reverse=True, key=lambda x: x.get('search_sort'))
def search(self, task, entry, config=None): config = self.process_config(config) feed = REPUTATIONS[config['reputation']] entries = set() for search_string in entry.get('search_strings', [entry['title']]): query = normalize_unicode(search_string + config.get('extra_terms', '')) for domain in ['eu', 'me', 'ch', 'in']: # urllib.quote will crash if the unicode string has non ascii characters, so encode in utf-8 beforehand url = 'http://torrentz.%s/%s?q=%s' % ( domain, feed, quote(query.encode('utf-8'))) log.debug('requesting: %s' % url) try: r = task.requests.get(url) break except requests.ConnectionError as err: # The different domains all resolve to the same ip, so only try more if it was a dns error log.warning('torrentz.%s connection failed. Error: %s' % (domain, err)) continue except requests.RequestException as err: raise plugin.PluginError( 'Error getting torrentz search results: %s' % err) else: raise plugin.PluginError( 'Error getting torrentz search results') if not r.content.strip(): raise plugin.PluginError( 'No data from %s. Maybe torrentz is blocking the FlexGet User-Agent' % url) rss = feedparser.parse(r.content) if rss.get('bozo_exception'): raise plugin.PluginError('Got bozo_exception (bad rss feed)') for item in rss.entries: m = re.search( r'Size: ([\d]+) Mb Seeds: ([,\d]+) Peers: ([,\d]+) Hash: ([a-f0-9]+)', item.description, re.IGNORECASE) if not m: log.debug('regexp did not find seeds / peer data') continue entry = Entry() entry['title'] = item.title entry['url'] = item.link entry['content_size'] = int(m.group(1)) entry['torrent_seeds'] = int(m.group(2).replace(',', '')) entry['torrent_leeches'] = int(m.group(3).replace(',', '')) entry['torrent_info_hash'] = m.group(4).upper() entry['search_sort'] = torrent_availability( entry['torrent_seeds'], entry['torrent_leeches']) entries.add(entry) log.debug('Search got %d results' % len(entries)) return entries
def search(self, task, entry, config): """ Search for entries on 1337x """ if not isinstance(config, dict): config = {} order_by = '' sort_order = '' if isinstance(config.get('order_by'), str): if config['order_by'] != 'leechers': order_by = '/{0}/desc'.format(config['order_by']) sort_order = 'sort-' entries = set() for search_string in entry.get('search_strings', [entry['title']]): query = '{0}search/{1}{2}/1/'.format( sort_order, quote(search_string.encode('utf8')), order_by) log.debug('Using search params: %s; ordering by: %s', search_string, order_by or 'default') try: page = task.requests.get(self.base_url + query) log.debug('requesting: %s', page.url) except RequestException as e: log.error('1337x request failed: %s', e) continue soup = get_soup(page.content) if soup.find('div', attrs={'class': 'tab-detail'}) is not None: for link in soup.find('div', attrs={ 'class': 'tab-detail' }).findAll('a', href=re.compile('^/torrent/')): li = link.parent.parent.parent title = str(link.text).replace('...', '') info_url = self.base_url + str(link.get('href'))[1:] seeds = int(li.find('span', class_='green').string) leeches = int(li.find('span', class_='red').string) size = str(li.find('div', class_='coll-4').string) size = parse_filesize(size) e = Entry() e['url'] = info_url e['title'] = title e['torrent_seeds'] = seeds e['torrent_leeches'] = leeches e['search_sort'] = torrent_availability( e['torrent_seeds'], e['torrent_leeches']) e['content_size'] = size entries.add(e) return entries
def search(self, task, entry, config=None): """ Search for name from torrentleech. """ request_headers = {'User-Agent': 'curl/7.54.0'} rss_key = config['rss_key'] # build the form request: data = {'username': config['username'], 'password': config['password']} # POST the login form: try: login = task.requests.post('https://www.torrentleech.org/user/account/login/', data=data, headers=request_headers, allow_redirects=True) except RequestException as e: raise PluginError('Could not connect to torrentleech: %s' % str(e)) if not isinstance(config, dict): config = {} # sort = SORT.get(config.get('sort_by', 'seeds')) # if config.get('sort_reverse'): # sort += 1 categories = config.get('category', 'all') # Make sure categories is a list if not isinstance(categories, list): categories = [categories] # If there are any text categories, turn them into their id number categories = [c if isinstance(c, int) else CATEGORIES[c] for c in categories] filter_url = '/categories/{}'.format(','.join(str(c) for c in categories)) entries = set() for search_string in entry.get('search_strings', [entry['title']]): query = normalize_unicode(search_string).replace(":", "") # urllib.quote will crash if the unicode string has non ascii characters, # so encode in utf-8 beforehand url = ('https://www.torrentleech.org/torrents/browse/list/query/' + quote(query.encode('utf-8')) + filter_url) log.debug('Using %s as torrentleech search url', url) results = task.requests.get(url, headers=request_headers, cookies=login.cookies).json() for torrent in results['torrentList']: entry = Entry() entry['download_headers'] = request_headers entry['title'] = torrent['name'] # construct download URL torrent_url = 'https://www.torrentleech.org/rss/download/{}/{}/{}'.format(torrent['fid'], rss_key, torrent['filename']) log.debug('RSS-ified download link: %s', torrent_url) entry['url'] = torrent_url # seeders/leechers entry['torrent_seeds'] = torrent['seeders'] entry['torrent_leeches'] = torrent['leechers'] entry['search_sort'] = torrent_availability(entry['torrent_seeds'], entry['torrent_leeches']) entry['content_size'] = parse_filesize(str(torrent['size']) + ' b') entries.add(entry) return sorted(entries, reverse=True, key=lambda x: x.get('search_sort'))
def search_title(self, name, comparator=StringComparator(), url=None): """ Search for name from piratebay. If optional search :url: is passed it will be used instead of internal search. """ comparator.set_seq1(name) name = comparator.search_string() if not url: # urllib.quote will crash if the unicode string has non ascii characters, so encode in utf-8 beforehand url = 'http://thepiratebay.org/search/' + urllib.quote(name.encode('utf-8')) log.debug('Using %s as piratebay search url' % url) page = urlopener(url, log) soup = get_soup(page) entries = [] for link in soup.findAll('a', attrs={'class': 'detLink'}): comparator.set_seq2(link.contents[0]) log.debug('name: %s' % comparator.a) log.debug('found name: %s' % comparator.b) log.debug('confidence: %s' % comparator.ratio()) if not comparator.matches(): continue entry = Entry() entry['title'] = link.contents[0] entry['url'] = 'http://thepiratebay.org' + link.get('href') tds = link.parent.parent.parent.findAll('td') entry['torrent_seeds'] = int(tds[-2].contents[0]) entry['torrent_leeches'] = int(tds[-1].contents[0]) entry['search_ratio'] = comparator.ratio() entry['search_sort'] = torrent_availability(entry['torrent_seeds'], entry['torrent_leeches']) # Parse content_size size = link.findNext(attrs={'class': 'detDesc'}).contents[0] size = re.search('Size ([\.\d]+)\xa0([GMK])iB', size) if size: if size.group(2) == 'G': entry['content_size'] = int(float(size.group(1)) * 1000 ** 3 / 1024 ** 2) elif size.group(2) == 'M': entry['content_size'] = int(float(size.group(1)) * 1000 ** 2 / 1024 ** 2) else: entry['content_size'] = int(float(size.group(1)) * 1000 / 1024 ** 2) entries.append(entry) if not entries: dashindex = name.rfind('-') if dashindex != -1: return self.search_title(name[:dashindex], comparator=comparator) else: raise PluginWarning('No close matches for %s' % name, log, log_once=True) def score(a): return torrent_availability(a['torrent_seeds'], a['torrent_leeches']) entries.sort(reverse=True, key=lambda x: x.get('search_sorted')) #for torrent in torrents: # log.debug('%s link: %s' % (torrent, torrent['link'])) return entries
def search(self, task, entry, config=None): """ Search for entries on SceneAccess """ session = task.requests if 'sceneaccess.eu' not in session.domain_limiters: session.add_domain_limiter(TimedLimiter('sceneaccess.eu', '7 seconds')) if not session.cookies: log.debug('Logging in to %s...' % URL) params = {'username': config['username'], 'password': config['password'], 'submit': 'come on in'} session.post(URL + 'login', data=params) if 'gravity_multiplier' in config: multip = config['gravity_multiplier'] else: multip = 1 # Prepare queries... base_urls = list() entries = set() for category in self.process_categories(config): base_urls.append(URL + '%(url_path)s?method=2%(category_url_string)s' % category) # Search... for search_string in entry.get('search_strings', [entry['title']]): search_string_normalized = normalize_unicode(clean_title(search_string)) search_string_url_fragment = '&search=' + quote(search_string_normalized.encode('utf8')) for url in base_urls: url += search_string_url_fragment log.debug('Search URL for `%s`: %s' % (search_string, url)) page = session.get(url).content soup = get_soup(page) for result in soup.findAll('tr', attrs={'class': 'tt_row'}): entry = Entry() entry['title'] = result.find('a', href=re.compile(r'details\?id=\d+'))['title'] entry['url'] = URL + result.find('a', href=re.compile(r'.torrent$'))['href'] entry['torrent_seeds'] = result.find('td', attrs={'class': 'ttr_seeders'}).text entry['torrent_leeches'] = result.find('td', attrs={'class': 'ttr_leechers'}).text entry['search_sort'] = torrent_availability(entry['torrent_seeds'], entry['torrent_leeches']) * multip size = result.find('td', attrs={'class': 'ttr_size'}).text size = re.search('(\d+(?:[.,]\d+)*)\s?([KMG]B)', size) entry['content_size'] = parse_filesize(size.group(0)) entries.add(entry) return entries
def search(self, task, entry, config=None): """ Search for name from iptorrents """ categories = config.get('category', 'all') # Make sure categories is a list if not isinstance(categories, list): categories = [categories] # If there are any text categories, turn them into their id number categories = [c if isinstance(c, int) else CATEGORIES[c] for c in categories] filter_url = '&'.join((str(c) + '=') for c in categories) entries = set() for search_string in entry.get('search_strings', [entry['title']]): query = normalize_unicode(search_string) query = quote_plus(query.encode('utf8')) url = "{base_url}/t?{filter}&q={query}&qf=".format(base_url=BASE_URL, filter=filter_url, query=query) log.debug('searching with url: %s' % url) req = requests.get(url, cookies={'uid': str(config['uid']), 'pass': config['password']}) if '/u/' + str(config.get('uid')) not in req.content: raise plugin.PluginError("Invalid cookies (user not logged in)...") soup = get_soup(req.content, parser="html.parser") torrents = soup.find('table', {'class': 'torrents'}) for torrent in torrents.findAll('a', href=re.compile('\.torrent$')): entry = Entry() entry['url'] = "{base}{link}?torrent_pass={key}".format( base=BASE_URL, link=torrent['href'], key=config.get('rss_key')) entry['title'] = torrent.findPrevious("a", attrs={'class': 't_title'}).text seeders = torrent.findNext('td', {'class': 'ac t_seeders'}).text leechers = torrent.findNext('td', {'class': 'ac t_leechers'}).text entry['torrent_seeds'] = int(seeders) entry['torrent_leeches'] = int(leechers) entry['search_sort'] = torrent_availability(entry['torrent_seeds'], entry['torrent_leeches']) size = torrent.findNext(text=re.compile('^([\.\d]+) ([GMK]?)B$')) size = re.search('^([\.\d]+) ([GMK]?)B$', size) if size: if size.group(2) == 'G': entry['content_size'] = int(float(size.group(1)) * 1000 ** 3 / 1024 ** 2) elif size.group(2) == 'M': entry['content_size'] = int(float(size.group(1)) * 1000 ** 2 / 1024 ** 2) elif size.group(2) == 'K': entry['content_size'] = int(float(size.group(1)) * 1000 / 1024 ** 2) else: entry['content_size'] = int(float(size.group(1)) / 1024 ** 2) entries.add(entry) return entries
def search(self, query, comparator, config): comparator.set_seq1(query) name = comparator.search_string().lower() search_string = name if config.get('verified'): search_string += ' verified:1' url = 'http://kat.ph/search/%s/?rss=1' % urllib.quote( search_string.encode('utf-8')) if config.get('category', 'all') != 'all': url += '&category=%s' % config['category'] log.debug('requesting: %s' % url) rss = feedparser.parse(url) entries = [] status = rss.get('status', False) if status != 200: raise PluginWarning('Search result not 200 (OK), received %s' % status) ex = rss.get('bozo_exception', False) if ex: raise PluginWarning('Got bozo_exception (bad feed)') for item in rss.entries: # Check if item passes comparator comparator.set_seq2(item.title) log.debug('name: %s, found name: %s, confidence: %s' % (comparator.a, comparator.b, comparator.ratio())) if not comparator.matches(): continue entry = Entry() entry['title'] = item.title if not item.get('enclosures'): log.warning( 'Could not get url for entry from KAT. Maybe plugin needs updated?' ) continue entry['url'] = item.enclosures[0]['url'] entry['search_ratio'] = comparator.ratio() entry['torrent_seeds'] = int(item.torrent_seeds) entry['torrent_leeches'] = int(item.torrent_peers) entry['search_sort'] = torrent_availability( entry['torrent_seeds'], entry['torrent_leeches']) entry['content_size'] = int( item.torrent_contentlength) / 1024 / 1024 entry['torrent_info_hash'] = item.torrent_infohash entries.append(entry) # choose torrent if not entries: raise PluginWarning('No matches for %s' % name, log, log_once=True) entries.sort(reverse=True, key=lambda x: x.get('search_sort')) return entries
def extract_entry_from_soup(self, soup): table = soup.find('div', { 'id': 'main_table' }).find('table', {'class': 'table_info'}) if len(table.find_all('tr')) == 1: log.debug('No search results were returned, continuing') return [] entries = [] for tr in table.find_all("tr"): if 'colhead_dark' in tr.get('class'): continue name = tr.find('div', {'class': 'main_title'}).find('a').text torrent_name = re.search( '\\r\\n(.*)', tr.find('div', { 'id': 'attachment_dl' }).find('a')['title']).group(1) attachment_link = tr.find('div', { 'id': 'attachment_dl' }).find('a')['href'] attachment_id = re.search('attachmentid\=(\d+)', attachment_link).group(1) raw_size = tr.find_all('td', {'class': 'inline_info'})[0].text.strip() seeders = int(tr.find_all('td', {'class': 'inline_info'})[2].text) leechers = int(tr.find_all('td', {'class': 'inline_info'})[3].text) e = Entry() e['title'] = name final_url = 'https://www.fuzer.me/rss/torrent.php/{}/{}/{}/{}'.format( attachment_id, self.user_id, self.rss_key, torrent_name) log.debug('RSS-ified download link: %s' % final_url) e['url'] = final_url e['torrent_seeds'] = seeders e['torrent_leeches'] = leechers e['search_sort'] = torrent_availability(e['torrent_seeds'], e['torrent_leeches']) size = re.search('(\d+.?\d+)([TGMK]?)B', raw_size) if size: if size.group(2) == 'T': e['content_size'] = int( float(size.group(1)) * 1000**4 / 1024**2) elif size.group(2) == 'G': e['content_size'] = int( float(size.group(1)) * 1000**3 / 1024**2) elif size.group(2) == 'M': e['content_size'] = int( float(size.group(1)) * 1000**2 / 1024**2) elif size.group(2) == 'K': e['content_size'] = int( float(size.group(1)) * 1000 / 1024**2) else: e['content_size'] = int(float(size.group(1)) / 1024**2) entries.append(e) return entries
def entries_from_search(self, name, url=None): """Parses torrent download url from search results""" name = normalize_unicode(name) if not url: url = 'http://www.newtorrents.info/search/%s' % quote( name.encode('utf-8'), safe=b':/~?=&%') log.debug('search url: %s' % url) html = requests.get(url).text # fix </SCR'+'IPT> so that BS does not crash # TODO: should use beautifulsoup massage html = re.sub(r'(</SCR.*?)...(.*?IPT>)', r'\1\2', html) soup = get_soup(html) # saving torrents in dict torrents = [] for link in soup.find_all('a', attrs={'href': re.compile('down.php')}): torrent_url = 'http://www.newtorrents.info%s' % link.get('href') release_name = link.parent.next.get('title') # quick dirty hack seed = link.find_next('td', attrs={ 'class': re.compile('s') }).renderContents() if seed == 'n/a': seed = 0 else: try: seed = int(seed) except ValueError: log.warning( 'Error converting seed value (%s) from newtorrents to integer.' % seed) seed = 0 # TODO: also parse content_size and peers from results torrents.append( Entry(title=release_name, url=torrent_url, torrent_seeds=seed, search_sort=torrent_availability(seed, 0))) # sort with seed number Reverse order torrents.sort(reverse=True, key=lambda x: x.get('search_sort', 0)) # choose the torrent if not torrents: dashindex = name.rfind('-') if dashindex != -1: return self.entries_from_search(name[:dashindex]) else: return torrents else: if len(torrents) == 1: log.debug('found only one matching search result.') else: log.debug( 'search result contains multiple matches, sorted %s by most seeders' % torrents) return torrents
def search(self, task, entry, config=None): """ Search for entries on PublicHD """ categories = config.get('category', 'all') # Ensure categories a list if not isinstance(categories, list): categories = [categories] # Convert named category to its respective category id number categories = [c if isinstance(c, int) else CATEGORIES[c] for c in categories] category_url_fragment = '&category=%s' % urllib.quote(';'.join(str(c) for c in categories)) base_url = 'http://publichd.se/index.php?page=torrents&active=0' entries = set() for search_string in entry.get('search_strings', [entry['title']]): query = normalize_unicode(search_string) query_url_fragment = '&search=' + urllib.quote(query.encode('utf8')) # http://publichd.se/index.php?page=torrents&active=0&category=5;15&search=QUERY url = (base_url + category_url_fragment + query_url_fragment) log.debug('PublicHD search url: %s' % url) page = requests.get(url).content soup = get_soup(page) for result in soup.findAll('a', href=re.compile('page=torrent-details')): entry = Entry() entry['title'] = result.text # Expand the selection to whole row result = result.findPrevious('tr') download_url = result.find('a', href=re.compile('\.torrent$'))['href'] torrent_hash = re.search(r'/([0-9a-fA-F]{5,40})/', download_url).group(1) entry['url'] = 'http://publichd.se/download.php?id=%s' % torrent_hash seeds, leeches = result.findAll('td', text=re.compile('^\d+$')) entry['torrent_seeds'] = int(seeds.text) entry['torrent_leeches'] = int(leeches.text) entry['search_sort'] = torrent_availability(entry['torrent_seeds'], entry['torrent_leeches']) size = result.find("td", text=re.compile('(\d+(?:[.,]\d+)*)\s?([KMG]B)')).text size = re.search('(\d+(?:[.,]\d+)*)\s?([KMG]B)', size) if size: if size.group(2) == 'GB': entry['content_size'] = int(float(size.group(1).replace(',', '')) * 1000 ** 3 / 1024 ** 2) elif size.group(2) == 'MB': entry['content_size'] = int(float(size.group(1).replace(',', '')) * 1000 ** 2 / 1024 ** 2) elif size.group(2) == 'KB': entry['content_size'] = int(float(size.group(1).replace(',', '')) * 1000 / 1024 ** 2) else: entry['content_size'] = int(float(size.group(1).replace(',', '')) / 1024 ** 2) entries.add(entry) return entries
def search(self, task, entry, config=None): """ Search for name from torrentday. """ categories = config.get('category', 'all') # Make sure categories is a list if not isinstance(categories, list): categories = [categories] # If there are any text categories, turn them into their id number categories = [c if isinstance(c, int) else CATEGORIES[c] for c in categories] params = { 'cata': 'yes', 'c%s' % ','.join(str(c) for c in categories): 1, 'clear-new': 1} entries = set() for search_string in entry.get('search_strings', [entry['title']]): url = 'https://www.torrentday.com/browse.php' params['search'] = normalize_unicode(search_string).replace(':', '') cookies = { 'uid': config['uid'], 'pass': config['passkey'], '__cfduid': config['cfduid'] } try: page = requests.get(url, params=params, cookies=cookies).content except RequestException as e: raise PluginError('Could not connect to torrentday: %s' % e) soup = get_soup(page) for tr in soup.find_all('tr', { 'class': 'browse' }): entry = Entry() # find the torrent names title = tr.find('a', { 'class': 'torrentName' }) entry['title'] = title.contents[0] log.debug('title: %s', title.contents[0]) # find download link torrent_url = tr.find('td', { 'class': 'dlLinksInfo' }) torrent_url = torrent_url.find('a').get('href') # construct download URL torrent_url = ( 'https://www.torrentday.com/' + torrent_url + '?torrent_pass='******'rss_key'] ) log.debug('RSS-ified download link: %s', torrent_url) entry['url'] = torrent_url # us tr object for seeders/leechers seeders, leechers = tr.find_all('td', { 'class': ['seedersInfo', 'leechersInfo']}) entry['torrent_seeds'] = int(seeders.contents[0].replace(',', '')) entry['torrent_leeches'] = int(leechers.contents[0].replace(',', '')) entry['search_sort'] = torrent_availability(entry['torrent_seeds'], entry['torrent_leeches']) # use tr object for size size = tr.find('td', text=re.compile('([\.\d]+) ([TGMKk]?)B')).contents[0] size = re.search('([\.\d]+) ([TGMKk]?)B', str(size)) entry['content_size'] = parse_filesize(size.group(0)) entries.add(entry) return sorted(entries, reverse=True, key=lambda x: x.get('search_sort'))
def search(self, query, comparator, config): # urllib.quote will crash if the unicode string has non ascii characters, so encode in utf-8 beforehand comparator.set_seq1(query) name = comparator.search_string() optionlist = ['misc', 'movies', 'audio', 'tv', 'games', 'apps', 'pics', 'anime', 'comics', 'books', 'music video', 'unclassified', 'all'] url = 'http://isohunt.com/js/rss/%s?iht=%s&noSL' % ( urllib.quote(name.encode('utf-8')), optionlist.index(config)) log.debug('requesting: %s' % url) rss = feedparser.parse(url) entries = [] status = rss.get('status', False) if status != 200: raise PluginWarning('Search result not 200 (OK), received %s' % status) ex = rss.get('bozo_exception', False) if ex: raise PluginWarning('Got bozo_exception (bad feed)') for item in rss.entries: # assign confidence score of how close this link is to the name you're looking for. .6 and above is "close" comparator.set_seq2(item.title) log.debug('name: %s' % comparator.a) log.debug('found name: %s' % comparator.b) log.debug('confidence: %s' % comparator.ratio()) if not comparator.matches(): continue entry = Entry() entry['title'] = item.title entry['url'] = item.link entry['search_ratio'] = comparator.ratio() m = re.search(r'Size: ([\d]+).*Seeds: (\d+).*Leechers: (\d+)', item.description, re.IGNORECASE) if not m: log.debug('regexp did not find seeds / peer data') continue else: log.debug('regexp found size(%s), Seeds(%s) and Leeches(%s)' % (m.group(1), m.group(2), m.group(3))) entry['content_size'] = int(m.group(1)) entry['torrent_seeds'] = int(m.group(2)) entry['torrent_leeches'] = int(m.group(3)) entry['search_sort'] = torrent_availability(entry['torrent_seeds'], entry['torrent_leeches']) entries.append(entry) # choose torrent if not entries: raise PluginWarning('No close matches for %s' % name, log, log_once=True) entries.sort(reverse=True, key=lambda x: x.get('search_sort')) return entries
def search(self, task, entry, config=None): """ Search for entries on PublicHD """ categories = config.get('category', 'all') # Ensure categories a list if not isinstance(categories, list): categories = [categories] # Convert named category to its respective category id number categories = [c if isinstance(c, int) else CATEGORIES[c] for c in categories] category_url_fragment = '&category=%s' % urllib.parse.quote(';'.join(str(c) for c in categories)) base_url = 'http://publichd.se/index.php?page=torrents&active=0' entries = set() for search_string in entry.get('search_strings', [entry['title']]): query = normalize_unicode(search_string) query_url_fragment = '&search=' + urllib.parse.quote(query.encode('utf8')) # http://publichd.se/index.php?page=torrents&active=0&category=5;15&search=QUERY url = (base_url + category_url_fragment + query_url_fragment) log.debug('PublicHD search url: %s' % url) page = requests.get(url).content soup = get_soup(page) for result in soup.findAll('a', href=re.compile('page=torrent-details')): entry = Entry() entry['title'] = result.text # Expand the selection to whole row result = result.findPrevious('tr') download_url = result.find('a', href=re.compile('\.torrent$'))['href'] torrent_hash = re.search(r'/([0-9a-fA-F]{5,40})/', download_url).group(1) entry['url'] = 'http://publichd.se/download.php?id=%s' % torrent_hash seeds, leeches = result.findAll('td', text=re.compile('^\d+$')) entry['torrent_seeds'] = int(seeds.text) entry['torrent_leeches'] = int(leeches.text) entry['search_sort'] = torrent_availability(entry['torrent_seeds'], entry['torrent_leeches']) size = result.find("td", text=re.compile('(\d+(?:[.,]\d+)*)\s?([KMG]B)')).text size = re.search('(\d+(?:[.,]\d+)*)\s?([KMG]B)', size) if size: if size.group(2) == 'GB': entry['content_size'] = int(float(size.group(1).replace(',', '')) * 1000 ** 3 / 1024 ** 2) elif size.group(2) == 'MB': entry['content_size'] = int(float(size.group(1).replace(',', '')) * 1000 ** 2 / 1024 ** 2) elif size.group(2) == 'KB': entry['content_size'] = int(float(size.group(1).replace(',', '')) * 1000 / 1024 ** 2) else: entry['content_size'] = int(float(size.group(1).replace(',', '')) / 1024 ** 2) entries.add(entry) return entries
def search(self, task, entry, config): search_strings = [normalize_unicode(s).lower() for s in entry.get('search_strings', [entry['title']])] entries = set() for search_string in search_strings: search_string = clean_title(search_string) search_string_url_fragment = search_string params = {'rss': 1} if config.get('verified'): search_string_url_fragment += ' verified:1' url = 'https://kat.cr/usearch/%s/' % quote(search_string_url_fragment.encode('utf-8')) if config.get('category', 'all') != 'all': params['category'] = config['category'] sorters = [{'field': 'time_add', 'sorder': 'desc'}, {'field': 'seeders', 'sorder': 'desc'}] for sort in sorters: params.update(sort) log.debug('requesting: %s' % url) try: r = task.requests.get(url, params=params, raise_status=False) except RequestException as e: log.warning('Search resulted in: %s' % e) continue if not r.content: log.debug('No content returned from search.') continue elif r.status_code != 200: log.warning('Search returned %s response code' % r.status_code) continue rss = feedparser.parse(r.content) ex = rss.get('bozo_exception', False) if ex: log.warning('Got bozo_exception (bad feed)') continue for item in rss.entries: entry = Entry() entry['title'] = item.title if not item.get('enclosures'): log.warning('Could not get url for entry from KAT. Maybe plugin needs updated?') continue entry['url'] = item.enclosures[0]['url'] entry['torrent_seeds'] = int(item.torrent_seeds) entry['torrent_leeches'] = int(item.torrent_peers) entry['search_sort'] = torrent_availability(entry['torrent_seeds'], entry['torrent_leeches']) entry['content_size'] = int(item.torrent_contentlength) / 1024 / 1024 entry['torrent_info_hash'] = item.torrent_infohash entries.add(entry) if len(rss.entries) < 25: break return entries
def search(self, entry, config=None): config = self.process_config(config) feed = REPUTATIONS[config['reputation']] entries = set() for search_string in entry.get('search_string', [entry['title']]): query = normalize_unicode(search_string + config.get('extra_terms', '')) # urllib.quote will crash if the unicode string has non ascii characters, so encode in utf-8 beforehand url = 'http://torrentz.eu/%s?q=%s' % ( feed, urllib.quote(query.encode('utf-8'))) log.debug('requesting: %s' % url) try: opened = urllib2.urlopen(url) except urllib2.URLError as err: url = 'http://torrentz.me/%s?q=%s' % ( feed, urllib.quote(query.encode('utf-8'))) log.warning( 'torrentz.eu failed, trying torrentz.me. Error: %s' % err) try: opened = urllib2.urlopen(url) except urllib2.URLError as err: raise plugin.PluginWarning('Error requesting URL: %s' % err) rss = feedparser.parse(opened) status = rss.get('status', False) if status != 200: raise plugin.PluginWarning( 'Search result not 200 (OK), received %s %s' % (status, opened.msg)) ex = rss.get('bozo_exception', False) if ex: raise plugin.PluginWarning('Got bozo_exception (bad feed)') for item in rss.entries: m = re.search( r'Size: ([\d]+) Mb Seeds: ([,\d]+) Peers: ([,\d]+) Hash: ([a-f0-9]+)', item.description, re.IGNORECASE) if not m: log.debug('regexp did not find seeds / peer data') continue entry = Entry() entry['title'] = item.title entry['url'] = item.link entry['content_size'] = int(m.group(1)) entry['torrent_seeds'] = int(m.group(2).replace(',', '')) entry['torrent_leeches'] = int(m.group(3).replace(',', '')) entry['torrent_info_hash'] = m.group(4).upper() entry['search_sort'] = torrent_availability( entry['torrent_seeds'], entry['torrent_leeches']) entries.add(entry) log.debug('Search got %d results' % len(entries)) return entries
def search(self, entry, config): search_strings = [normalize_unicode(s).lower() for s in entry.get('search_strings', [entry['title']])] entries = set() for search_string in search_strings: search_string_url_fragment = search_string if config.get('verified'): search_string_url_fragment += ' verified:1' url = 'http://kickass.to/search/%s/?rss=1' % urllib.quote(search_string_url_fragment.encode('utf-8')) if config.get('category', 'all') != 'all': url += '&category=%s' % config['category'] sorters = [{'field': 'time_add', 'sorder': 'desc'}, {'field': 'seeders', 'sorder': 'desc'}] for sort in sorters: url += '&field=%(field)s&sorder=%(sorder)s' % sort log.debug('requesting: %s' % url) rss = feedparser.parse(url) status = rss.get('status', False) if status == 404: # Kat returns status code 404 when no results found for some reason... log.debug('No results found for search query: %s' % search_string) continue elif status not in [200, 301]: log.warning('Search result not 200 (OK), received %s' % status) continue ex = rss.get('bozo_exception', False) if ex: log.warning('Got bozo_exception (bad feed)') continue for item in rss.entries: entry = Entry() entry['title'] = item.title if not item.get('enclosures'): log.warning('Could not get url for entry from KAT. Maybe plugin needs updated?') continue entry['url'] = item.enclosures[0]['url'] entry['torrent_seeds'] = int(item.torrent_seeds) entry['torrent_leeches'] = int(item.torrent_peers) entry['search_sort'] = torrent_availability(entry['torrent_seeds'], entry['torrent_leeches']) entry['content_size'] = int(item.torrent_contentlength) / 1024 / 1024 entry['torrent_info_hash'] = item.torrent_infohash entries.add(entry) if len(rss.entries) < 25: break return entries
def search(self, task, entry, config=None): """ Search for name from piratebay. """ if not isinstance(config, dict): config = {} self.set_urls(config.get('url', URL)) sort = SORT.get(config.get('sort_by', 'seeds')) if config.get('sort_reverse'): sort += 1 if isinstance(config.get('category'), int): category = config['category'] else: category = CATEGORIES.get(config.get('category', 'all')) filter_url = '/0/%d/%d' % (sort, category) entries = set() for search_string in entry.get('search_strings', [entry['title']]): query = normalize_unicode(search_string) # TPB search doesn't like dashes or quotes query = query.replace('-', ' ').replace("'", " ") # urllib.quote will crash if the unicode string has non ascii characters, so encode in utf-8 beforehand url = '%s/search/%s%s' % (self.url, quote(query.encode('utf-8')), filter_url) log.debug('Using %s as piratebay search url' % url) page = task.requests.get(url).content soup = get_soup(page) for link in soup.find_all('a', attrs={'class': 'detLink'}): entry = Entry() entry['title'] = self.extract_title(link) if not entry['title']: log.error('Malformed search result. No title or url found. Skipping.') continue href = link.get('href') if href.startswith('/'): # relative link? href = self.url + href entry['url'] = href tds = link.parent.parent.parent.find_all('td') entry['torrent_seeds'] = int(tds[-2].contents[0]) entry['torrent_leeches'] = int(tds[-1].contents[0]) entry['search_sort'] = torrent_availability(entry['torrent_seeds'], entry['torrent_leeches']) # Parse content_size size_text = link.find_next(attrs={'class': 'detDesc'}).get_text() if size_text: size = re.search('Size (\d+(\.\d+)?\xa0(?:[PTGMK])?i?B)', size_text) if size: entry['content_size'] = parse_filesize(size.group(1)) else: log.error('Malformed search result? Title: "%s", No size? %s', entry['title'], size_text) entries.add(entry) return sorted(entries, reverse=True, key=lambda x: x.get('search_sort'))
def search(self, arg_entry, config=None): """ Search for name from piratebay. """ if not isinstance(config, dict): config = {} sort = SORT.get(config.get('sort_by', 'seeds')) if config.get('sort_reverse'): sort += 1 if isinstance(config.get('category'), int): category = config['category'] else: category = CATEGORIES.get(config.get('category', 'all')) filter_url = '/0/%d/%d' % (sort, category) entries = set() for search_string in arg_entry.get('search_string', [arg_entry['title']]): query = normalize_unicode(search_string) # TPB search doesn't like dashes query = query.replace('-', ' ') # urllib.quote will crash if the unicode string has non ascii characters, so encode in utf-8 beforehand url = 'http://thepiratebay.%s/search/%s%s' % ( CUR_TLD, urllib.quote(query.encode('utf-8')), filter_url) log.debug('Using %s as piratebay search url' % url) page = requests.get(url).content soup = get_soup(page) for link in soup.find_all('a', attrs={'class': 'detLink'}): entry = Entry() entry['title'] = link.contents[0] entry['url'] = 'http://thepiratebay.%s%s' % (CUR_TLD, link.get('href')) tds = link.parent.parent.parent.find_all('td') entry['torrent_seeds'] = int(tds[-2].contents[0]) entry['torrent_leeches'] = int(tds[-1].contents[0]) entry['search_sort'] = torrent_availability( entry['torrent_seeds'], entry['torrent_leeches']) # Parse content_size size = link.find_next(attrs={'class': 'detDesc'}).contents[0] size = re.search('Size ([\.\d]+)\xa0([GMK])iB', size) if size: if size.group(2) == 'G': entry['content_size'] = int( float(size.group(1)) * 1000**3 / 1024**2) elif size.group(2) == 'M': entry['content_size'] = int( float(size.group(1)) * 1000**2 / 1024**2) else: entry['content_size'] = int( float(size.group(1)) * 1000 / 1024**2) entries.add(entry) return sorted( entries, reverse=True, key=lambda x: x.get('search_sort'))
def search(self, task, entry, config): """ Search for entries on 1337x """ if not isinstance(config, dict): config = {} order_by = '' sort_order = '' if isinstance(config.get('order_by'), str): if config['order_by'] != 'leechers': order_by = '/{0}/desc'.format(config['order_by']) sort_order = 'sort-' entries = set() for search_string in entry.get('search_strings', [entry['title']]): query = '{0}search/{1}{2}/1/'.format(sort_order, quote(search_string.encode('utf8')), order_by) log.debug('Using search params: %s; ordering by: %s', search_string, order_by or 'default') try: page = task.requests.get(self.base_url + query) log.debug('requesting: %s', page.url) except RequestException as e: log.error('1337x request failed: %s', e) continue soup = get_soup(page.content) if soup.find('div', attrs={'class': 'tab-detail'}) is not None: for link in soup.find('div', attrs={'class': 'tab-detail'}).findAll('a', href=re.compile('^/torrent/')): li = link.parent.parent.parent title = str(link.text).replace('...', '') info_url = self.base_url + str(link.get('href'))[1:] seeds = int(li.find('span', class_='green').string) leeches = int(li.find('span', class_='red').string) size = str(li.find('div', class_='coll-4').string) size = parse_filesize(size) e = Entry() e['url'] = info_url e['title'] = title e['torrent_seeds'] = seeds e['torrent_leeches'] = leeches e['search_sort'] = torrent_availability(e['torrent_seeds'], e['torrent_leeches']) e['content_size'] = size entries.add(e) return entries
def search(self, query, comparator=StringComparator(), config=None): if config: feed = REPUTATIONS[config] else: feed = REPUTATIONS['good'] # urllib.quote will crash if the unicode string has non ascii characters, so encode in utf-8 beforehand comparator.set_seq1(query) query = comparator.search_string() url = 'http://torrentz.eu/%s?q=%s' % (feed, urllib.quote(query.encode('utf-8'))) log.debug('requesting: %s' % url) rss = feedparser.parse(url) entries = [] status = rss.get('status', False) if status != 200: raise PluginWarning('Search result not 200 (OK), received %s' % status) ex = rss.get('bozo_exception', False) if ex: raise PluginWarning('Got bozo_exception (bad feed)') for item in rss.entries: # assign confidence score of how close this link is to the name you're looking for. .6 and above is "close" comparator.set_seq2(item.title) log.debug('name: %s' % comparator.a) log.debug('found name: %s' % comparator.b) log.debug('confidence: %s' % comparator.ratio()) if not comparator.matches(): continue m = re.search(r'Size: ([\d]+) Mb Seeds: ([,\d]+) Peers: ([,\d]+) Hash: ([a-f0-9]+)', item.description, re.IGNORECASE) if not m: log.debug('regexp did not find seeds / peer data') continue entry = Entry() entry['title'] = item.title entry['url'] = item.link entry['content_size'] = int(m.group(1)) entry['torrent_seeds'] = int(m.group(2).replace(',', '')) entry['torrent_leeches'] = int(m.group(3).replace(',', '')) entry['torrent_info_hash'] = m.group(4).upper() entry['search_ratio'] = comparator.ratio() entry['search_sort'] = torrent_availability(entry['torrent_seeds'], entry['torrent_leeches']) entries.append(entry) # choose torrent if not entries: raise PluginWarning('No close matches for %s' % query, log, log_once=True) entries.sort(reverse=True, key=lambda x: x.get('search_sort')) log.debug('Search got %d results' % len(entries)) return entries
def search(self, entry, config): # urllib.quote will crash if the unicode string has non ascii characters, so encode in utf-8 beforehand name = normalize_unicode(entry['title']) optionlist = [ 'misc', 'movies', 'audio', 'tv', 'games', 'apps', 'pics', 'anime', 'comics', 'books', 'music video', 'unclassified', 'all' ] url = 'http://isohunt.com/js/rss/%s?iht=%s&noSL' % (urllib.quote( name.encode('utf-8')), optionlist.index(config)) log.debug('requesting: %s' % url) rss = feedparser.parse(url) entries = [] status = rss.get('status', False) if status != 200: raise PluginWarning('Search result not 200 (OK), received %s' % status) ex = rss.get('bozo_exception', False) if ex: raise PluginWarning('Got bozo_exception (bad feed)') for item in rss.entries: entry = Entry() entry['title'] = item.title entry['url'] = item.link m = re.search(r'Size: ([\d]+).*Seeds: (\d+).*Leechers: (\d+)', item.description, re.IGNORECASE) if not m: log.debug('regexp did not find seeds / peer data') continue else: log.debug('regexp found size(%s), Seeds(%s) and Leeches(%s)' % (m.group(1), m.group(2), m.group(3))) entry['content_size'] = int(m.group(1)) entry['torrent_seeds'] = int(m.group(2)) entry['torrent_leeches'] = int(m.group(3)) entry['search_sort'] = torrent_availability( entry['torrent_seeds'], entry['torrent_leeches']) entries.append(entry) # choose torrent if not entries: raise PluginWarning('No close matches for %s' % name, log, log_once=True) entries.sort(reverse=True, key=lambda x: x.get('search_sort')) return entries
def search(self, task, entry, config=None): config = self.prepare_config(config) if not session.cookies: log.debug('Logging in to %s...' % URL) params = { 'username': config['username'], 'password': config['password'], 'keeplogged': '1', 'login': '******' } session.post(URL + 'login.php', data=params) cat = ''.join([ '&' + ('filter_cat[%s]' % id) + '=1' for id in config['category'] ]) rls = 'release_type=' + config['type'] url_params = rls + cat multip = config['gravity_multiplier'] entries = set() for search_string in entry.get('search_strings', [entry['title']]): srch = normalize_unicode(clean_title(search_string)) srch = '&searchstr=' + quote(srch.encode('utf8')) url = URL + 'torrents.php?' + url_params + srch log.debug('Fetching URL for `%s`: %s' % (search_string, url)) page = session.get(url).content soup = get_soup(page) for result in soup.findAll('tr', attrs={'class': 'torrent'}): entry = Entry() entry['title'] = result.find('span', attrs={ 'class': 'torrent_name_link' }).text entry['url'] = URL + result.find( 'a', href=re.compile( 'torrents\.php\?action=download')).get('href') entry['torrent_seeds'], entry['torrent_leeches'] = [ r.text for r in result.findAll('td')[-2:] ] entry['search_sort'] = torrent_availability( entry['torrent_seeds'], entry['torrent_leeches']) * multip size = result.findAll('td')[-4].text size = re.search('(\d+(?:[.,]\d+)*)\s?([KMG]B)', size) entry['content_size'] = parse_filesize(size.group(0)) entries.add(entry) return entries
def search(self, entry, config): api_key = config searches = entry.get('search_strings', [entry['title']]) if 'series_name' in entry: search = {'series': entry['series_name']} if 'series_id' in entry: # BTN wants an ep style identifier even for sequence shows if entry.get('series_id_type') == 'sequence': search['name'] = 'S01E%02d' % entry['series_id'] else: search['name'] = entry['series_id'] searches = [search] results = set() for search in searches: data = json.dumps({ 'method': 'getTorrents', 'params': [api_key, search], 'id': 1 }) try: r = session.post('http://api.btnapps.net/', data=data, headers={'Content-type': 'application/json'}) except requests.RequestException as e: log.error('Error searching btn: %s' % e) continue content = r.json() if not content or not content['result']: log.debug('No results from btn') continue if 'torrents' in content['result']: for item in content['result']['torrents'].itervalues(): if item['Category'] != 'Episode': continue entry = Entry() entry['title'] = item['ReleaseName'] entry['title'] += ' '.join([ '', item['Resolution'], item['Source'], item['Codec'] ]) entry['url'] = item['DownloadURL'] entry['torrent_seeds'] = int(item['Seeders']) entry['torrent_leeches'] = int(item['Leechers']) entry['torrent_info_hash'] = item['InfoHash'] entry['search_sort'] = torrent_availability( entry['torrent_seeds'], entry['torrent_leeches']) if item['TvdbID']: entry['tvdb_id'] = int(item['TvdbID']) results.add(entry) return results
def entries_from_search(self, name, url=None, comparator=StringComparator(cutoff=0.9)): """Parses torrent download url from search results""" comparator.set_seq1(name) name = comparator.search_string() if not url: url = 'http://www.newtorrents.info/search/%s' % urllib.quote(name, safe=':/~?=&%') log.debug('search url: %s' % url) html = urlopener(url, log).read() # fix </SCR'+'IPT> so that BS does not crash # TODO: should use beautifulsoup massage html = re.sub(r'(</SCR.*?)...(.*?IPT>)', r'\1\2', html) soup = get_soup(html) # saving torrents in dict torrents = [] for link in soup.findAll('a', attrs={'href': re.compile('down.php')}): torrent_url = 'http://www.newtorrents.info%s' % link.get('href') release_name = link.parent.next.get('title') # quick dirty hack seed = link.findNext('td', attrs={'class': re.compile('s')}).renderContents() if seed == 'n/a': seed = 0 else: try: seed = int(seed) except ValueError: log.warning('Error converting seed value (%s) from newtorrents to integer.' % seed) seed = 0 #TODO: also parse content_size and peers from results if comparator.matches(release_name): torrents.append(Entry(title=release_name, url=torrent_url, torrent_seeds=seed, search_ratio=comparator.ratio(), search_sort=torrent_availability(seed, 0))) else: log.debug('rejecting search result: %s !~ %s' % (release_name, name)) # sort with seed number Reverse order torrents.sort(reverse=True, key=lambda x: x.get('search_sort', 0)) # choose the torrent if not torrents: dashindex = name.rfind('-') if dashindex != -1: return self.entries_from_search(name[:dashindex], comparator=comparator) else: raise PluginWarning('No matches for %s' % name, log, log_once=True) else: if len(torrents) == 1: log.debug('found only one matching search result.') else: log.debug('search result contains multiple matches, sorted %s by most seeders' % torrents) return torrents
def search(self, query, comparator, config): comparator.set_seq1(query) name = comparator.search_string().lower() url = 'http://www.kat.ph/search/%s/?rss=1' % urllib.quote( name.encode('utf-8')) if config != 'all': url += '&category=%s' % config log.debug('requesting: %s' % url) rss = feedparser.parse(url) entries = [] status = rss.get('status', False) if status != 200: raise PluginWarning('Search result not 200 (OK), received %s' % status) ex = rss.get('bozo_exception', False) if ex: raise PluginWarning('Got bozo_exception (bad feed)') for item in rss.entries: # Check if item passes comparator comparator.set_seq2(item.title) log.debug('name: %s, found name: %s, confidence: %s' % (comparator.a, comparator.b, comparator.ratio())) if not comparator.matches(): continue entry = Entry() entry['title'] = item.title if item.torrentlink.startswith('//'): entry['url'] = 'http:' + item.torrentlink else: entry['url'] = item.torrentlink entry['search_ratio'] = comparator.ratio() entry['torrent_seeds'] = int(item.seeds) entry['torrent_leeches'] = int(item.leechs) entry['search_sort'] = torrent_availability( entry['torrent_seeds'], entry['torrent_leeches']) entry['content_size'] = int(item.size) / 1024 / 1024 entry['torrent_info_hash'] = item.hash entries.append(entry) # choose torrent if not entries: raise PluginWarning('No matches for %s' % name, log, log_once=True) entries.sort(reverse=True, key=lambda x: x.get('search_sort')) return entries
def search(self, query, comparator, config): comparator.set_seq1(query) name = comparator.search_string().lower() search_string = name if config.get('verified'): search_string += ' verified:1' url = 'http://kat.ph/search/%s/?rss=1' % urllib.quote(search_string.encode('utf-8')) if config.get('category', 'all') != 'all': url += '&category=%s' % config['category'] log.debug('requesting: %s' % url) rss = feedparser.parse(url) entries = [] status = rss.get('status', False) if status != 200: raise PluginWarning('Search result not 200 (OK), received %s' % status) ex = rss.get('bozo_exception', False) if ex: raise PluginWarning('Got bozo_exception (bad feed)') for item in rss.entries: # Check if item passes comparator comparator.set_seq2(item.title) log.debug('name: %s, found name: %s, confidence: %s' % (comparator.a, comparator.b, comparator.ratio())) if not comparator.matches(): continue entry = Entry() entry['title'] = item.title if not item.get('enclosures'): log.warning('Could not get url for entry from KAT. Maybe plugin needs updated?') continue entry['url'] = item.enclosures[0]['url'] entry['search_ratio'] = comparator.ratio() entry['torrent_seeds'] = int(item.seeds) entry['torrent_leeches'] = int(item.leechs) entry['search_sort'] = torrent_availability(entry['torrent_seeds'], entry['torrent_leeches']) entry['content_size'] = int(item.size) / 1024 / 1024 entry['torrent_info_hash'] = item.hash entries.append(entry) # choose torrent if not entries: raise PluginWarning('No matches for %s' % name, log, log_once=True) entries.sort(reverse=True, key=lambda x: x.get('search_sort')) return entries
def search(self, task, entry, config=None): config = self.prepare_config(config) if not session.cookies: log.debug('Logging in to %s...' % URL) params = { 'username': config['username'], 'password': config['password'], 'keeplogged': '1', 'login': '******' } session.post(URL + 'login.php', data=params) cat = ''.join(['&' + ('filter_cat[%s]' % id) + '=1' for id in config['category']]) rls = 'release_type=' + config['type'] url_params = rls + cat multip = config['gravity_multiplier'] entries = set() for search_string in entry.get('search_strings', [entry['title']]): srch = normalize_unicode(clean_title(search_string)) srch = '&searchstr=' + quote(srch.encode('utf8')) url = URL + 'torrents.php?' + url_params + srch log.debug('Fetching URL for `%s`: %s' % (search_string, url)) page = session.get(url).content soup = get_soup(page) for result in soup.findAll('tr', attrs={'class': 'torrent'}): entry = Entry() entry['title'] = result.find('span', attrs={'class': 'torrent_name_link'}).text entry['url'] = URL + result.find('a', href=re.compile('torrents\.php\?action=download')).get('href') entry['torrent_seeds'], entry['torrent_leeches'] = [r.text for r in result.findAll('td')[-2:]] entry['search_sort'] = torrent_availability(entry['torrent_seeds'], entry['torrent_leeches']) * multip size = result.findAll('td')[-4].text size = re.search('(\d+(?:[.,]\d+)*)\s?([KMG]B)', size) if size: if size.group(2) == 'GB': entry['content_size'] = int(float(size.group(1).replace(',', '')) * 1000 ** 3 / 1024 ** 2) elif size.group(2) == 'MB': entry['content_size'] = int(float(size.group(1).replace(',', '')) * 1000 ** 2 / 1024 ** 2) elif size.group(2) == 'KB': entry['content_size'] = int(float(size.group(1).replace(',', '')) * 1000 / 1024 ** 2) else: entry['content_size'] = int(float(size.group(1).replace(',', '')) / 1024 ** 2) entries.add(entry) return entries
def search(self, task, entry, config=None): config = self.process_config(config) feed = REPUTATIONS[config['reputation']] entries = set() for search_string in entry.get('search_strings', [entry['title']]): query = normalize_unicode(search_string + config.get('extra_terms', '')) for domain in ['eu', 'me', 'ch', 'in']: # urllib.quote will crash if the unicode string has non ascii characters, so encode in utf-8 beforehand url = 'http://torrentz.%s/%s?q=%s' % (domain, feed, quote(query.encode('utf-8'))) log.debug('requesting: %s' % url) try: r = task.requests.get(url) break except requests.ConnectionError as err: # The different domains all resolve to the same ip, so only try more if it was a dns error log.warning('torrentz.%s connection failed. Error: %s' % (domain, err)) continue except requests.RequestException as err: raise plugin.PluginError('Error getting torrentz search results: %s' % err) else: raise plugin.PluginError('Error getting torrentz search results') if not r.content.strip(): raise plugin.PluginError('No data from %s. Maybe torrentz is blocking the FlexGet User-Agent' % url) rss = feedparser.parse(r.content) if rss.get('bozo_exception'): raise plugin.PluginError('Got bozo_exception (bad rss feed)') for item in rss.entries: m = re.search(r'Size: ([\d]+) Mb Seeds: ([,\d]+) Peers: ([,\d]+) Hash: ([a-f0-9]+)', item.description, re.IGNORECASE) if not m: log.debug('regexp did not find seeds / peer data') continue entry = Entry() entry['title'] = item.title entry['url'] = item.link entry['content_size'] = int(m.group(1)) entry['torrent_seeds'] = int(m.group(2).replace(',', '')) entry['torrent_leeches'] = int(m.group(3).replace(',', '')) entry['torrent_info_hash'] = m.group(4).upper() entry['search_sort'] = torrent_availability(entry['torrent_seeds'], entry['torrent_leeches']) entries.add(entry) log.debug('Search got %d results' % len(entries)) return entries
def extract_entry_from_soup(self, soup): table = soup.find('div', {'id': 'main_table'}) if table is None: raise PluginError( 'Could not fetch results table from Fuzer, aborting') log.trace('fuzer results table: %s', table) table = table.find('table', {'class': 'table_info'}) if len(table.find_all('tr')) == 1: log.debug('No search results were returned from Fuzer, continuing') return [] entries = [] for tr in table.find_all("tr"): if not tr.get('class') or 'colhead_dark' in tr.get('class'): continue name = tr.find('div', {'class': 'main_title'}).find('a').text torrent_name = re.search( '\\n(.*)', tr.find('div', { 'style': 'float: right;' }).find('a')['title']).group(1) attachment_link = tr.find('div', { 'style': 'float: right;' }).find('a')['href'] attachment_id = re.search('attachmentid=(\d+)', attachment_link).group(1) raw_size = tr.find_all('td', {'class': 'inline_info'})[0].text.strip() seeders = int(tr.find_all('td', {'class': 'inline_info'})[2].text) leechers = int(tr.find_all('td', {'class': 'inline_info'})[3].text) e = Entry() e['title'] = name final_url = 'https://www.fuzer.me/rss/torrent.php/{}/{}/{}/{}'.format( attachment_id, self.user_id, self.rss_key, torrent_name) log.debug('RSS-ified download link: %s', final_url) e['url'] = final_url e['torrent_seeds'] = seeders e['torrent_leeches'] = leechers e['search_sort'] = torrent_availability(e['torrent_seeds'], e['torrent_leeches']) size = re.search('(\d+(?:[.,]\d+)*)\s?([KMGTP]B)', raw_size) e['content_size'] = parse_filesize(size.group(0)) entries.append(e) return entries
def search(self, task, entry, config=None): """ Search for name from piratebay. """ if not isinstance(config, dict): config = {} sort = SORT.get(config.get("sort_by", "seeds")) if config.get("sort_reverse"): sort += 1 if isinstance(config.get("category"), int): category = config["category"] else: category = CATEGORIES.get(config.get("category", "all")) filter_url = "/0/%d/%d" % (sort, category) entries = set() for search_string in entry.get("search_strings", [entry["title"]]): query = normalize_unicode(search_string) # TPB search doesn't like dashes query = query.replace("-", " ") # urllib.quote will crash if the unicode string has non ascii characters, so encode in utf-8 beforehand url = "http://thepiratebay.%s/search/%s%s" % (CUR_TLD, quote(query.encode("utf-8")), filter_url) log.debug("Using %s as piratebay search url" % url) page = requests.get(url).content soup = get_soup(page) for link in soup.find_all("a", attrs={"class": "detLink"}): entry = Entry() entry["title"] = self.extract_title(link) if not entry["title"]: log.error("Malformed search result. No title or url found. Skipping.") continue entry["url"] = "http://thepiratebay.%s%s" % (CUR_TLD, link.get("href")) tds = link.parent.parent.parent.find_all("td") entry["torrent_seeds"] = int(tds[-2].contents[0]) entry["torrent_leeches"] = int(tds[-1].contents[0]) entry["search_sort"] = torrent_availability(entry["torrent_seeds"], entry["torrent_leeches"]) # Parse content_size size = link.find_next(attrs={"class": "detDesc"}).contents[0] size = re.search("Size ([\.\d]+)\xa0([GMK])iB", size) if size: if size.group(2) == "G": entry["content_size"] = int(float(size.group(1)) * 1000 ** 3 / 1024 ** 2) elif size.group(2) == "M": entry["content_size"] = int(float(size.group(1)) * 1000 ** 2 / 1024 ** 2) else: entry["content_size"] = int(float(size.group(1)) * 1000 / 1024 ** 2) entries.add(entry) return sorted(entries, reverse=True, key=lambda x: x.get("search_sort"))
def search(self, entry, config=None): config = self.process_config(config) feed = REPUTATIONS[config['reputation']] entries = set() for search_string in entry.get('search_string', [entry['title']]): query = normalize_unicode(search_string + config.get('extra_terms', '')) for domain in ['eu', 'me']: # urllib.quote will crash if the unicode string has non ascii characters, so encode in utf-8 beforehand url = 'http://torrentz.%s/%s?q=%s' % ( domain, feed, urllib.quote(query.encode('utf-8'))) log.debug('requesting: %s' % url) try: r = requests.get(url) break except requests.RequestException as err: log.warning('torrentz.%s failed. Error: %s' % (domain, err)) else: raise plugin.PluginWarning( 'Error getting torrentz search results') rss = feedparser.parse(r.content) ex = rss.get('bozo_exception', False) if ex: raise plugin.PluginWarning('Got bozo_exception (bad feed)') for item in rss.entries: m = re.search( r'Size: ([\d]+) Mb Seeds: ([,\d]+) Peers: ([,\d]+) Hash: ([a-f0-9]+)', item.description, re.IGNORECASE) if not m: log.debug('regexp did not find seeds / peer data') continue entry = Entry() entry['title'] = item.title entry['url'] = item.link entry['content_size'] = int(m.group(1)) entry['torrent_seeds'] = int(m.group(2).replace(',', '')) entry['torrent_leeches'] = int(m.group(3).replace(',', '')) entry['torrent_info_hash'] = m.group(4).upper() entry['search_sort'] = torrent_availability( entry['torrent_seeds'], entry['torrent_leeches']) entries.add(entry) log.debug('Search got %d results' % len(entries)) return entries
def search(self, entry, config=None): config = self.process_config(config) feed = REPUTATIONS[config['reputation']] entries = set() for search_string in entry.get('search_string', [entry['title']]): query = normalize_unicode(search_string+config.get('extra_terms', '')) # urllib.quote will crash if the unicode string has non ascii characters, so encode in utf-8 beforehand url = 'http://torrentz.eu/%s?q=%s' % (feed, urllib.quote(query.encode('utf-8'))) log.debug('requesting: %s' % url) try: opened = urllib2.urlopen(url) except urllib2.URLError as err: url = 'http://torrentz.me/%s?q=%s' % (feed, urllib.quote(query.encode('utf-8'))) log.warning('torrentz.eu failed, trying torrentz.me. Error: %s' % err) try: opened = urllib2.urlopen(url) except urllib2.URLError as err: raise plugin.PluginWarning('Error requesting URL: %s' % err) rss = feedparser.parse(opened) status = rss.get('status', False) if status != 200: raise plugin.PluginWarning( 'Search result not 200 (OK), received %s %s' % (status, opened.msg)) ex = rss.get('bozo_exception', False) if ex: raise plugin.PluginWarning('Got bozo_exception (bad feed)') for item in rss.entries: m = re.search(r'Size: ([\d]+) Mb Seeds: ([,\d]+) Peers: ([,\d]+) Hash: ([a-f0-9]+)', item.description, re.IGNORECASE) if not m: log.debug('regexp did not find seeds / peer data') continue entry = Entry() entry['title'] = item.title entry['url'] = item.link entry['content_size'] = int(m.group(1)) entry['torrent_seeds'] = int(m.group(2).replace(',', '')) entry['torrent_leeches'] = int(m.group(3).replace(',', '')) entry['torrent_info_hash'] = m.group(4).upper() entry['search_sort'] = torrent_availability(entry['torrent_seeds'], entry['torrent_leeches']) entries.add(entry) log.debug('Search got %d results' % len(entries)) return entries
def search(self, entry, config=None): config = self.process_config(config) feed = REPUTATIONS[config["reputation"]] entries = set() for search_string in entry.get("search_strings", [entry["title"]]): query = normalize_unicode(search_string + config.get("extra_terms", "")) for domain in ["eu", "me"]: # urllib.quote will crash if the unicode string has non ascii characters, so encode in utf-8 beforehand url = "http://torrentz.%s/%s?q=%s" % (domain, feed, urllib.quote(query.encode("utf-8"))) log.debug("requesting: %s" % url) try: r = requests.get(url, headers={"User-Agent": "FlexGet/%s" % flexget.__version__}) break except requests.RequestException as err: log.warning("torrentz.%s failed. Error: %s" % (domain, err)) else: raise plugin.PluginError("Error getting torrentz search results") if not r.content.strip(): raise plugin.PluginError("No data from %s. Maybe torrentz is blocking the FlexGet User-Agent" % url) rss = feedparser.parse(r.content) if rss.get("bozo_exception"): raise plugin.PluginError("Got bozo_exception (bad rss feed)") for item in rss.entries: m = re.search( r"Size: ([\d]+) Mb Seeds: ([,\d]+) Peers: ([,\d]+) Hash: ([a-f0-9]+)", item.description, re.IGNORECASE, ) if not m: log.debug("regexp did not find seeds / peer data") continue entry = Entry() entry["title"] = item.title entry["url"] = item.link entry["content_size"] = int(m.group(1)) entry["torrent_seeds"] = int(m.group(2).replace(",", "")) entry["torrent_leeches"] = int(m.group(3).replace(",", "")) entry["torrent_info_hash"] = m.group(4).upper() entry["search_sort"] = torrent_availability(entry["torrent_seeds"], entry["torrent_leeches"]) entries.add(entry) log.debug("Search got %d results" % len(entries)) return entries
def entries_from_search(self, name, url=None): """Parses torrent download url from search results""" name = normalize_unicode(name) if not url: url = 'http://www.newtorrents.info/search/%s' % quote(name.encode('utf-8'), safe=b':/~?=&%') log.debug('search url: %s' % url) html = requests.get(url).text # fix </SCR'+'IPT> so that BS does not crash # TODO: should use beautifulsoup massage html = re.sub(r'(</SCR.*?)...(.*?IPT>)', r'\1\2', html) soup = get_soup(html) # saving torrents in dict torrents = [] for link in soup.find_all('a', attrs={'href': re.compile('down.php')}): torrent_url = 'http://www.newtorrents.info%s' % link.get('href') release_name = link.parent.next.get('title') # quick dirty hack seed = link.find_next('td', attrs={'class': re.compile('s')}).renderContents() if seed == 'n/a': seed = 0 else: try: seed = int(seed) except ValueError: log.warning('Error converting seed value (%s) from newtorrents to integer.' % seed) seed = 0 # TODO: also parse content_size and peers from results torrents.append(Entry(title=release_name, url=torrent_url, torrent_seeds=seed, search_sort=torrent_availability(seed, 0))) # sort with seed number Reverse order torrents.sort(reverse=True, key=lambda x: x.get('search_sort', 0)) # choose the torrent if not torrents: dashindex = name.rfind('-') if dashindex != -1: return self.entries_from_search(name[:dashindex]) else: return torrents else: if len(torrents) == 1: log.debug('found only one matching search result.') else: log.debug('search result contains multiple matches, sorted %s by most seeders' % torrents) return torrents
def search(self, entry, config): search_strings = [ normalize_unicode(s).lower() for s in entry.get('search_strings', [entry['title']]) ] entries = set() for search_string in search_strings: if config.get('verified'): search_string += ' verified:1' url = 'http://kickass.to/search/%s/?rss=1' % urllib.quote( search_string.encode('utf-8')) if config.get('category', 'all') != 'all': url += '&category=%s' % config['category'] log.debug('requesting: %s' % url) rss = feedparser.parse(url) status = rss.get('status', False) if status != 200: raise plugin.PluginWarning( 'Search result not 200 (OK), received %s' % status) ex = rss.get('bozo_exception', False) if ex: raise plugin.PluginWarning('Got bozo_exception (bad feed)') for item in rss.entries: entry = Entry() entry['title'] = item.title if not item.get('enclosures'): log.warning( 'Could not get url for entry from KAT. Maybe plugin needs updated?' ) continue entry['url'] = item.enclosures[0]['url'] entry['torrent_seeds'] = int(item.torrent_seeds) entry['torrent_leeches'] = int(item.torrent_peers) entry['search_sort'] = torrent_availability( entry['torrent_seeds'], entry['torrent_leeches']) entry['content_size'] = int( item.torrent_contentlength) / 1024 / 1024 entry['torrent_info_hash'] = item.torrent_infohash entries.add(entry) return entries
def search(self, query, comparator, config): comparator.set_seq1(query) name = comparator.search_string().lower() url = 'http://www.kat.ph/search/%s/?rss=1' % urllib.quote(name.encode('utf-8')) if config != 'all': url += '&category=%s' % config log.debug('requesting: %s' % url) rss = feedparser.parse(url) entries = [] status = rss.get('status', False) if status != 200: raise PluginWarning('Search result not 200 (OK), received %s' % status) ex = rss.get('bozo_exception', False) if ex: raise PluginWarning('Got bozo_exception (bad feed)') for item in rss.entries: # Check if item passes comparator comparator.set_seq2(item.title) log.debug('name: %s, found name: %s, confidence: %s' % (comparator.a, comparator.b, comparator.ratio())) if not comparator.matches(): continue entry = Entry() entry['title'] = item.title if item.torrentlink.startswith('//'): entry['url'] = 'http:' + item.torrentlink else: entry['url'] = item.torrentlink entry['search_ratio'] = comparator.ratio() entry['torrent_seeds'] = int(item.seeds) entry['torrent_leeches'] = int(item.leechs) entry['search_sort'] = torrent_availability(entry['torrent_seeds'], entry['torrent_leeches']) entry['content_size'] = int(item.size) / 1024 / 1024 entry['torrent_info_hash'] = item.hash entries.append(entry) # choose torrent if not entries: raise PluginWarning('No matches for %s' % name, log, log_once=True) entries.sort(reverse=True, key=lambda x: x.get('search_sort')) return entries
def search(self, task, entry, config=None): entries = set() search_strings = [normalize_unicode(s) for s in entry.get('search_strings', [entry['title']])] for search_string in search_strings: url = 'https://yts.am/api/v2/list_movies.json?query_term=%s' % ( urllib.quote(search_string.encode('utf-8'))) log.debug('requesting: %s' % url) try: result = requests.get(url) try: data = result.json() except ValueError: log.debug('Could not decode json from response: %s', result.text) raise plugin.PluginError('Error getting result from yts.') except requests.RequestException as e: raise plugin.PluginError('Could not retrieve query from yts (%s)' % e.args[0]) if not data['status'] == 'ok': raise plugin.PluginError('failed to query YTS') try: if data['data']['movie_count'] > 0: for item in data['data']['movies']: for torrent in item['torrents']: entry = Entry() entry['title'] = item['title'] entry['year'] = item['year'] entry['url'] = torrent['url'] entry['content_size'] = parse_filesize(str(torrent['size_bytes']) + "b") entry['torrent_seeds'] = torrent['seeds'] entry['torrent_leeches'] = torrent['peers'] entry['torrent_info_hash'] = torrent['hash'] entry['search_sort'] = torrent_availability(entry['torrent_seeds'], entry['torrent_leeches']) entry['quality'] = torrent['quality'] entry['imdb_id'] = item['imdb_code'] if entry.isvalid(): entries.add(entry) except Exception: log.debug('invalid return structure from YTS') log.debug('Search got %d results' % len(entries)) return entries
def search(self, entry, config): api_key = config searches = entry.get('search_strings', [entry['title']]) if 'series_name' in entry: search = {'series': entry['series_name']} if 'series_id' in entry: search['name'] = entry['series_id'] searches = [search] results = [] for search in searches: data = json.dumps({ 'method': 'getTorrents', 'params': [api_key, search], 'id': 1 }) try: r = session.post('http://api.btnapps.net/', data=data, headers={'Content-type': 'application/json'}) except requests.RequestException as e: log.error('Error searching btn: %s' % e) continue content = r.json() if content['result']['results']: for item in content['result']['torrents'].itervalues(): if item['Category'] != 'Episode': continue entry = Entry() entry['title'] = item['ReleaseName'] entry['url'] = item['DownloadURL'] entry['torrent_seeds'] = int(item['Seeders']) entry['torrent_leeches'] = int(item['Leechers']) entry['torrent_info_hash'] = item['InfoHash'] entry['search_sort'] = torrent_availability( entry['torrent_seeds'], entry['torrent_leeches']) if item['TvdbID']: entry['tvdb_id'] = int(item['TvdbID']) results.append(entry) return results
def search(self, task, entry, config): if not isinstance(config, dict): config = {'category': config} config.setdefault('category', 'anime eng') config.setdefault('filter', 'all') entries = set() for search_string in entry.get('search_strings', [entry['title']]): name = normalize_unicode(search_string) url = 'https://www.nyaa.si/?page=rss&q=%s&c=%s&f=%s' % ( quote(name.encode('utf-8')), CATEGORIES[config['category']], FILTERS.index(config['filter'])) log.debug('requesting: %s' % url) rss = feedparser.parse(url) status = rss.get('status', False) if status != 200: log.debug('Search result not 200 (OK), received %s' % status) if status >= 400: continue ex = rss.get('bozo_exception', False) if ex: log.error('Got bozo_exception (bad feed) on %s' % url) continue for item in rss.entries: entry = Entry() entry['title'] = item.title entry['url'] = item.link entry['torrent_seeds'] = int(item.nyaa_seeders) entry['torrent_leeches'] = int(item.nyaa_leechers) entry['torrent_info_hash'] = item.nyaa_infohash entry['search_sort'] = torrent_availability( entry['torrent_seeds'], entry['torrent_leeches']) if item.nyaa_size: entry['content_size'] = parse_filesize(item.nyaa_size) entries.add(entry) return entries
def search(self, entry, config): search_strings = [ normalize_unicode(s).lower() for s in entry.get('search_strings', [entry['title']]) ] entries = set() for search_string in search_strings: search_string_url_fragment = search_string if config.get('verified'): search_string_url_fragment += ' verified:1' url = 'http://kickass.to/search/%s/?rss=1' % urllib.quote( search_string_url_fragment.encode('utf-8')) if config.get('category', 'all') != 'all': url += '&category=%s' % config['category'] sorters = [{ 'field': 'time_add', 'sorder': 'desc' }, { 'field': 'seeders', 'sorder': 'desc' }] for sort in sorters: url += '&field=%(field)s&sorder=%(sorder)s' % sort log.debug('requesting: %s' % url) rss = feedparser.parse(url) status = rss.get('status', False) if status == 404: # Kat returns status code 404 when no results found for some reason... log.debug('No results found for search query: %s' % search_string) continue elif status != 200: raise plugin.PluginWarning( 'Search result not 200 (OK), received %s' % status) ex = rss.get('bozo_exception', False) if ex: raise plugin.PluginWarning('Got bozo_exception (bad feed)') for item in rss.entries: entry = Entry() entry['title'] = item.title if not item.get('enclosures'): log.warning( 'Could not get url for entry from KAT. Maybe plugin needs updated?' ) continue entry['url'] = item.enclosures[0]['url'] entry['torrent_seeds'] = int(item.torrent_seeds) entry['torrent_leeches'] = int(item.torrent_peers) entry['search_sort'] = torrent_availability( entry['torrent_seeds'], entry['torrent_leeches']) entry['content_size'] = int( item.torrent_contentlength) / 1024 / 1024 entry['torrent_info_hash'] = item.torrent_infohash entries.add(entry) if len(rss.entries) < 25: break return entries
def search(self, task, entry, config=None): """ Search for name from torrent411. """ url_base = 'http://www.t411.in' if not isinstance(config, dict): config = {} category = config.get('category') if category in list(CATEGORIES): category = CATEGORIES[category] sub_categories = config.get('sub_category') if not isinstance(sub_categories, list): sub_categories = [sub_categories] filter_url = '' if isinstance(category, int): filter_url = '&cat=%s' % str(category) if sub_categories[0] is not None: sub_categories = [SUB_CATEGORIES[c] for c in sub_categories] filter_url = filter_url + '&' + '&'.join([ urllib.quote_plus('term[%s][]' % c[0]).encode('utf-8') + '=' + str(c[1]) for c in sub_categories ]) entries = set() for search_string in entry.get('search_strings', [entry['title']]): query = normalize_unicode(search_string) url_search = ('/torrents/search/?search=%40name+' + urllib.quote_plus(query.encode('utf-8')) + filter_url) opener = urllib2.build_opener() opener.addheaders = [('User-agent', 'Mozilla/5.0')] response = opener.open(url_base + url_search) data = response.read() soup = get_soup(data) tb = soup.find("table", class_="results") if not tb: continue for tr in tb.findAll('tr')[1:][:-1]: entry = Entry() nfo_link_res = re.search('torrents/nfo/\?id=(\d+)', str(tr)) if nfo_link_res is not None: tid = nfo_link_res.group(1) title_res = re.search( '<a href=\"//www.t411.in/torrents/([-A-Za-z0-9+&@#/%|?=~_|!:,.;]+)\" title="([^"]*)">', str(tr)) if title_res is not None: entry['title'] = title_res.group(2).decode('utf-8') size = tr('td')[5].contents[0] entry[ 'url'] = 'http://www.t411.in/torrents/download/?id=%s' % tid entry['torrent_seeds'] = tr('td')[7].contents[0] entry['torrent_leeches'] = tr('td')[8].contents[0] entry['search_sort'] = torrent_availability( entry['torrent_seeds'], entry['torrent_leeches']) size = re.search('([\.\d]+) ([GMK]?)B', size) if size: if size.group(2) == 'G': entry['content_size'] = int( float(size.group(1)) * 1000**3 / 1024**2) elif size.group(2) == 'M': entry['content_size'] = int( float(size.group(1)) * 1000**2 / 1024**2) elif size.group(2) == 'K': entry['content_size'] = int( float(size.group(1)) * 1000 / 1024**2) else: entry['content_size'] = int( float(size.group(1)) / 1024**2) auth_handler = t411Auth(config['username'], config['password']) entry['download_auth'] = auth_handler entries.add(entry) return sorted(entries, reverse=True, key=lambda x: x.get('search_sort'))
def search(self, task, entry, config=None): """ Search for entries on SceneAccess """ session = task.requests if 'sceneaccess.eu' not in session.domain_limiters: session.add_domain_limiter( TimedLimiter('sceneaccess.eu', '7 seconds')) if not session.cookies: log.debug('Logging in to %s...' % URL) params = { 'username': config['username'], 'password': config['password'], 'submit': 'come on in' } session.post(URL + 'login', data=params) if 'gravity_multiplier' in config: multip = config['gravity_multiplier'] else: multip = 1 # Prepare queries... BASE_URLS = list() entries = set() for category in self.processCategories(config): BASE_URLS.append(URL + '%(url_path)s?method=2%(category_url_string)s' % category) # Search... for search_string in entry.get('search_strings', [entry['title']]): search_string_normalized = normalize_unicode( clean_title(search_string)) search_string_url_fragment = '&search=' + quote( search_string_normalized.encode('utf8')) for url in BASE_URLS: url += search_string_url_fragment log.debug('Search URL for `%s`: %s' % (search_string, url)) page = session.get(url).content soup = get_soup(page) for result in soup.findAll('tr', attrs={'class': 'tt_row'}): entry = Entry() entry['title'] = result.find( 'a', href=re.compile(r'details\?id=\d+'))['title'] entry['url'] = URL + result.find( 'a', href=re.compile(r'.torrent$'))['href'] entry['torrent_seeds'] = result.find('td', attrs={ 'class': 'ttr_seeders' }).text entry['torrent_leeches'] = result.find('td', attrs={ 'class': 'ttr_leechers' }).text entry['search_sort'] = torrent_availability( entry['torrent_seeds'], entry['torrent_leeches']) * multip size = result.find('td', attrs={'class': 'ttr_size'}).text size = re.search('(\d+(?:[.,]\d+)*)\s?([KMG]B)', size) if size: if size.group(2) == 'GB': entry['content_size'] = int( float(size.group(1)) * 1000**3 / 1024**2) elif size.group(2) == 'MB': entry['content_size'] = int( float(size.group(1)) * 1000**2 / 1024**2) elif size.group(2) == 'KB': entry['content_size'] = int( float(size.group(1)) * 1000 / 1024**2) else: entry['content_size'] = int( float(size.group(1)) / 1024**2) entries.add(entry) return entries
def search(self, task, entry, config=None): """ Search for name from torrentleech. """ rss_key = config['rss_key'] # build the form request: data = { 'username': config['username'], 'password': config['password'], 'remember_me': 'on', 'submit': 'submit' } # POST the login form: login = requests.post('https://torrentleech.org/', data=data) if not isinstance(config, dict): config = {} # sort = SORT.get(config.get('sort_by', 'seeds')) # if config.get('sort_reverse'): # sort += 1 categories = config.get('category', 'all') # Make sure categories is a list if not isinstance(categories, list): categories = [categories] # If there are any text categories, turn them into their id number categories = [ c if isinstance(c, int) else CATEGORIES[c] for c in categories ] filter_url = '/categories/%s' % ','.join(str(c) for c in categories) entries = set() for search_string in entry.get('search_strings', [entry['title']]): query = normalize_unicode(search_string).replace(":", "") # urllib.quote will crash if the unicode string has non ascii characters, so encode in utf-8 beforehand url = ('http://torrentleech.org/torrents/browse/index/query/' + quote(query.encode('utf-8')) + filter_url) log.debug('Using %s as torrentleech search url' % url) page = requests.get(url, cookies=login.cookies).content soup = get_soup(page) for tr in soup.find_all("tr", ["even", "odd"]): # within each even or odd row, find the torrent names link = tr.find("a", attrs={'href': re.compile('/torrent/\d+')}) log.debug('link phase: %s' % link.contents[0]) entry = Entry() # extracts the contents of the <a>titlename/<a> tag entry['title'] = link.contents[0] # find download link torrent_url = tr.find("a", attrs={ 'href': re.compile('/download/\d+/.*') }).get('href') # parse link and split along /download/12345 and /name.torrent download_url = re.search('(/download/\d+)/(.+\.torrent)', torrent_url) # change link to rss and splice in rss_key torrent_url = 'http://torrentleech.org/rss' + download_url.group(1) + '/' \ + rss_key + '/' + download_url.group(2) log.debug('RSS-ified download link: %s' % torrent_url) entry['url'] = torrent_url # us tr object for seeders/leechers seeders, leechers = tr.find_all('td', ["seeders", "leechers"]) entry['torrent_seeds'] = int(seeders.contents[0]) entry['torrent_leeches'] = int(leechers.contents[0]) entry['search_sort'] = torrent_availability( entry['torrent_seeds'], entry['torrent_leeches']) # use tr object for size size = tr.find( "td", text=re.compile('([\.\d]+) ([TGMK]?)B')).contents[0] size = re.search('([\.\d]+) ([TGMK]?)B', size) if size: if size.group(2) == 'T': entry['content_size'] = int( float(size.group(1)) * 1000**4 / 1024**2) elif size.group(2) == 'G': entry['content_size'] = int( float(size.group(1)) * 1000**3 / 1024**2) elif size.group(2) == 'M': entry['content_size'] = int( float(size.group(1)) * 1000**2 / 1024**2) elif size.group(2) == 'K': entry['content_size'] = int( float(size.group(1)) * 1000 / 1024**2) else: entry['content_size'] = int( float(size.group(1)) / 1024**2) entries.add(entry) return sorted(entries, reverse=True, key=lambda x: x.get('search_sort'))