def create(product_id, props, sale, brand): product = None product_key_name = Product.calc_key_name(product_id) if Product.get_by_key_name(product_key_name) is None: product = Product(key_name=product_key_name, id=product_id, name=props['name'], sale=sale, ship_min=parse(props.get('min_ship_date')), ship_max=parse(props.get('max_ship_date')), brand=brand, description=helper.remove_html_tags(props.get('description')), status=props.get('status'), fit=props.get('fit'), categories=props['categories'], origin=props.get('origin'), return_policy_id=props.get('return_policy_id')) material = props.get('material') if material: try: product.materials = [str(m).strip().lower() for m in unidecode(material).split(',')] except: product.materials = material color = props.get('color') if color: product.colors = [str(c).strip().lower() for c in unidecode(color).split('/')] product.put() return product
def _search_provider(self, search_params, **kwargs): results = [] if not self._authorised(): return results items = {'Cache': [], 'Season': [], 'Episode': [], 'Propers': []} rc = dict((k, re.compile('(?i)' + v)) for (k, v) in {'info': 'view', 'get': 'download', 'title': 'view\s+torrent\s+'}.items()) for mode in search_params.keys(): for search_string in search_params[mode]: search_string = isinstance(search_string, unicode) and unidecode(search_string) or search_string # fetch 15 results by default, and up to 100 if allowed in user profile search_url = self.urls['search'] % (search_string, self._categories_string(mode, 'filter_cat[%s]=1')) html = self.get_url(search_url) cnt = len(items[mode]) try: if not html or self._has_no_results(html): raise generic.HaltParseException with BS4Parser(html, features=['html5lib', 'permissive']) as soup: torrent_table = soup.find('table', attrs={'class': 'torrent_table'}) torrent_rows = [] if not torrent_table else torrent_table.find_all('tr') if 2 > len(torrent_rows): raise generic.HaltParseException for tr in torrent_rows[1:]: try: seeders, leechers, size = [tryInt(n, n) for n in [ tr.find_all('td')[x].get_text().strip() for x in (-2, -1, -4)]] if self._peers_fail(mode, seeders, leechers): continue info = tr.find('a', title=rc['info']) title = 'title' in info.attrs and rc['title'].sub('', info.attrs['title']) \ or info.get_text().strip() link = str(tr.find('a', title=rc['get'])['href']).replace('&', '&').lstrip('/') download_url = self.urls['get'] % link except (AttributeError, TypeError, ValueError): continue if title and download_url: items[mode].append((title, download_url, seeders, self._bytesizer(size))) except generic.HaltParseException: pass except Exception: logger.log(u'Failed to parse. Traceback: %s' % traceback.format_exc(), logger.ERROR) self._log_search(mode, len(items[mode]) - cnt, search_url) self._sort_seeders(mode, items) results = list(set(results + items[mode])) return results
def _doSearch(self, search_params, epcount=0, age=0): results = [] items = {'Season': [], 'Episode': [], 'RSS': []} for mode in search_params.keys(): for search_string in search_params[mode]: if mode != 'RSS': searchURL = self.proxy._buildURL(self.searchurl % (urllib.quote(unidecode(search_string)))) else: searchURL = self.proxy._buildURL(self.url + 'tv/latest/') logger.log(u"Search string: " + searchURL, logger.DEBUG) data = self.getURL(searchURL) if not data: continue re_title_url = self.proxy._buildRE(self.re_title_url) #Extracting torrent information from data returned by searchURL match = re.compile(re_title_url, re.DOTALL).finditer(urllib.unquote(data)) for torrent in match: title = torrent.group('title').replace('_', '.') #Do not know why but SickBeard skip release with '_' in name url = torrent.group('url') id = int(torrent.group('id')) seeders = int(torrent.group('seeders')) leechers = int(torrent.group('leechers')) #Filter unseeded torrent if mode != 'RSS' and seeders == 0: continue #Accept Torrent only from Good People for every Episode Search if self.confirmed and re.search('(VIP|Trusted|Helper)', torrent.group(0)) is None: logger.log(u"ThePirateBay Provider found result " + torrent.group( 'title') + " but that doesn't seem like a trusted result so I'm ignoring it", logger.DEBUG) continue #Check number video files = episode in season and find the real Quality for full season torrent analyzing files in torrent if mode == 'Season': ep_number = int(epcount / len(set(allPossibleShowNames(self.show)))) title = self._find_season_quality(title, id, ep_number) if not title or not url: continue item = title, url, id, seeders, leechers items[mode].append(item) #For each search mode sort all the items by seeders items[mode].sort(key=lambda tup: tup[3], reverse=True) results += items[mode] return results
def _search_provider(self, search_params, **kwargs): self._authorised() results = [] items = {'Cache': [], 'Season': [], 'Episode': [], 'Propers': []} url = self.urls['browse'] % self.passkey for mode in search_params.keys(): for search_string in search_params[mode]: search_string = isinstance(search_string, unicode) and unidecode(search_string) or search_string search_url = url + (self.urls['search'] % search_string, '')['Cache' == mode] xml_data = self.cache.get_rss(search_url) cnt = len(items[mode]) if xml_data and 'entries' in xml_data: for entry in xml_data['entries']: try: if entry['title'] and 'download' in entry['link']: items[mode].append((entry['title'], entry['link'], None, None)) except KeyError: continue self._log_search(mode, len(items[mode]) - cnt, search_url) results = list(set(results + items[mode])) return results
def _search_provider(self, search_params, **kwargs): results = [] if not self._authorised(): return results items = {'Cache': [], 'Season': [], 'Episode': [], 'Propers': []} for mode in search_params.keys(): for search_string in search_params[mode]: search_string = isinstance(search_string, unicode) and unidecode(search_string) or search_string search_url = self.urls['search'] % search_string data_json = self.get_url(search_url, headers=dict(Authorization='Bearer %s' % self._token), json=True) if self.should_skip(): return results cnt = len(items[mode]) if data_json: for tr in data_json.get('releases'): seeders, leechers, size = (tryInt(n, n) for n in [ tr.get(x) for x in ('seeders', 'leechers', 'size')]) if not self._reject_item(seeders, leechers): title, download_url = tr.get('releaseName'), self._link(tr.get('shortId')) if title and download_url: items[mode].append((title, download_url, seeders, self._bytesizer(size))) self._log_search(mode, len(items[mode]) - cnt, search_url) results = self._sort_seeding(mode, results + items[mode]) return results
def _search_provider(self, search_params, **kwargs): results = [] if self.show and not self.show.is_anime: return results items = {'Cache': [], 'Season': [], 'Episode': [], 'Propers': []} rc = dict((k, re.compile('(?i)' + v)) for (k, v) in {'nodots': r'[\.\s]+'}.items()) for mode in search_params.keys(): for search_string in search_params[mode]: search_string = isinstance(search_string, unicode) and unidecode(search_string) or search_string search_url = self.urls['browse'] if 'Cache' == mode else \ self.urls['search'] % (rc['nodots'].sub(' ', search_string), str(time.time()).replace('.', '3')) data, html = 2 * [None] if 'Cache' == mode: data = self.cache.get_rss(search_url) else: html = self.get_url(search_url) if self.should_skip(): return results cnt = len(items[mode]) try: if None is not data: for cur_item in data.get('entries', []): title, download_url = cur_item.get('title'), self._link(cur_item.get('link')) if title and download_url: items[mode].append((title, download_url, '', '')) if not html or self._has_no_results(html): raise generic.HaltParseException with BS4Parser('<html><body>%s</body></html>' % html, features=['html5lib', 'permissive']) as soup: for link in soup.find_all('a'): try: variants = map(lambda t: t.get_text().replace('SD', '480p'), link.find_all('span', class_='badge')) map(lambda t: t.decompose(), link.find_all('span') + link.find_all('div')) title = '[HorribleSubs] ' + re.sub(r'\s*\[HorribleSubs\]\s*', '', link.get_text()) download_url = self._link(link.get('href')) if title and download_url: items[mode] += map(lambda v: ( '%s [%s]' % (title, v), '%s-%s' % (download_url, v), '', ''), variants) except (AttributeError, TypeError, ValueError): continue except generic.HaltParseException: pass except (StandardError, Exception): logger.log(u'Failed to parse. Traceback: %s' % traceback.format_exc(), logger.ERROR) self._log_search(mode, len(items[mode]) - cnt, search_url) results = self._sort_seeding(mode, results + items[mode]) return results
def _search_provider(self, search_params, **kwargs): results = [] if not self._authorised(): return results items = {'Cache': [], 'Season': [], 'Episode': [], 'Propers': []} rc = dict((k, re.compile('(?i)' + v)) for (k, v) in {'info': 'detail', 'get': 'download'}.items()) for mode in search_params.keys(): for search_string in search_params[mode]: search_string = isinstance(search_string, unicode) and unidecode(search_string) or search_string # URL with 50 tv-show results, or max 150 if adjusted in IPTorrents profile search_url = self.urls['search'] % (self._categories_string(mode, '%s', ';'), search_string, ('', ';free')[self.freeleech], (';o=seeders', '')['Cache' == mode]) html = self.get_url(search_url) cnt = len(items[mode]) try: if not html or self._has_no_results(html): raise generic.HaltParseException with BS4Parser(html, features=['html5lib', 'permissive']) as soup: torrent_table = soup.find('table', attrs={'class': 'torrents'}) torrent_rows = [] if not torrent_table else torrent_table.find_all('tr') if 2 > len(torrent_rows): raise generic.HaltParseException for tr in torrent_rows[1:]: try: seeders, leechers = [int(tr.find('td', attrs={'class': x}).get_text().strip()) for x in ('t_seeders', 't_leechers')] if self._peers_fail(mode, seeders, leechers): continue info = tr.find('a', href=rc['info']) title = ('title' in info.attrs and info['title']) or info.get_text().strip() size = tr.find_all('td')[-4].get_text().strip() download_url = self.urls['get'] % str(tr.find('a', href=rc['get'])['href']).lstrip('/') except (AttributeError, TypeError, ValueError): continue if title and download_url: items[mode].append((title, download_url, seeders, self._bytesizer(size))) except generic.HaltParseException: pass except Exception: logger.log(u'Failed to parse. Traceback: %s' % traceback.format_exc(), logger.ERROR) self._log_search(mode, len(items[mode]) - cnt, search_url) self._sort_seeders(mode, items) results = list(set(results + items[mode])) return results
def _search_provider(self, search_params, **kwargs): results = [] if not self._authorised(): return results items = {'Cache': [], 'Season': [], 'Episode': [], 'Propers': []} rc = dict((k, re.compile('(?i)' + v)) for (k, v) in {'info': 'detail', 'get': 'download', 'fl': '\[\W*F\W?L\W*\]' }.items()) for mode in search_params.keys(): for search_string in search_params[mode]: search_string = isinstance(search_string, unicode) and unidecode(search_string) or search_string search_url = self.urls['search'] % (search_string, self._categories_string()) html = self.get_url(search_url, timeout=90) cnt = len(items[mode]) try: if not html or self._has_no_results(html): raise generic.HaltParseException with BS4Parser(html, features=['html5lib', 'permissive'], attr='border="1"') as soup: torrent_table = soup.find('table', attrs={'border': '1'}) torrent_rows = [] if not torrent_table else torrent_table.find_all('tr') if 2 > len(torrent_rows): raise generic.HaltParseException for tr in torrent_rows[1:]: try: info = tr.find('a', href=rc['info']) seeders, leechers, size = [tryInt(n, n) for n in [ tr.find_all('td')[x].get_text().strip() for x in (-2, -1, -4)]] if self.freeleech and (len(info.contents) < 2 or not rc['fl'].search(info.contents[1].string.strip())) \ or self._peers_fail(mode, seeders, leechers): continue title = 'title' in info.attrs and info.attrs['title'] or info.contents[0] title = (isinstance(title, list) and title[0] or title).strip() download_url = self.urls['get'] % str(tr.find('a', href=rc['get'])['href']).lstrip('/') except (AttributeError, TypeError, ValueError): continue if title and download_url: items[mode].append((title, download_url, seeders, self._bytesizer(size))) except generic.HaltParseException: pass except Exception: logger.log(u'Failed to parse. Traceback: %s' % traceback.format_exc(), logger.ERROR) self._log_search(mode, len(items[mode]) - cnt, search_url) self._sort_seeders(mode, items) results = list(set(results + items[mode])) return results
def html(self, mode, search_string, results): if 'Content-Type' in self.session.headers: del (self.session.headers['Content-Type']) setattr(self.session, 'reserved', {'headers': { 'Accept': 'text/html, application/xhtml+xml, */*', 'Accept-Language': 'en-GB', 'Cache-Control': 'no-cache', 'Referer': 'https://broadcasthe.net/login.php', 'User-Agent': self.ua}}) self.headers = None if self.auth_html or self._authorised_html(): del (self.session.reserved['headers']['Referer']) if 'Referer' in self.session.headers: del (self.session.headers['Referer']) self.auth_html = True search_string = isinstance(search_string, unicode) and unidecode(search_string) or search_string search_url = self.urls['search'] % (search_string, self._categories_string(mode, 'filter_cat[%s]=1')) html = self.get_url(search_url, use_tmr_limit=False) if self.should_skip(log_warning=False, use_tmr_limit=False): return results cnt = len(results) try: if not html or self._has_no_results(html): raise generic.HaltParseException with BS4Parser(html, features=['html5lib', 'permissive']) as soup: torrent_table = soup.find(id='torrent_table') torrent_rows = [] if not torrent_table else torrent_table.find_all('tr') if 2 > len(torrent_rows): raise generic.HaltParseException rc = dict((k, re.compile('(?i)' + v)) for (k, v) in { 'cats': '(?i)cat\[(?:%s)\]' % self._categories_string(mode, template='', delimiter='|'), 'get': 'download'}.items()) head = None for tr in torrent_rows[1:]: cells = tr.find_all('td') if 5 > len(cells): continue try: head = head if None is not head else self._header_row(tr) seeders, leechers, size = [tryInt(n, n) for n in [ cells[head[x]].get_text().strip() for x in 'seed', 'leech', 'size']] if not tr.find('a', href=rc['cats']) or self._reject_item( seeders, leechers, container=self.reject_m2ts and ( re.search(r'(?i)\[.*?m2?ts.*?\]', tr.get_text('', strip=True)))): continue title = tr.select('td span[title]')[0].attrs.get('title').strip() download_url = self._link(tr.find('a', href=rc['get'])['href']) except (AttributeError, TypeError, ValueError, KeyError, IndexError): continue if title and download_url: results.append((title, download_url, seeders, self._bytesizer(size)))
def _search_provider(self, search_params, **kwargs): results = [] if not self._authorised(): return results items = {'Cache': [], 'Season': [], 'Episode': [], 'Propers': []} for mode in search_params.keys(): rc = dict((k, re.compile('(?i)' + v)) for (k, v) in { 'info': 'detail', 'get': '.*id=(\d+).*', 'fl': '\[freeleech\]', 'cats': 'cat=(?:%s)' % self._categories_string(mode=mode, template='', delimiter='|')}.items()) for search_string in search_params[mode]: search_string = isinstance(search_string, unicode) and unidecode(search_string) or search_string search_url = self.urls['search'] % (self._categories_string(), '+'.join(search_string.replace('.', ' ').split()), ('', '&freeleech=on')[self.freeleech]) html = self.get_url(search_url) if self.should_skip(): return results cnt = len(items[mode]) try: if not html or self._has_no_results(html): raise generic.HaltParseException with BS4Parser(html, features=['html5lib', 'permissive']) as soup: torrent_table = soup.find('table', attrs={'cellpadding': 5}) torrent_rows = [] if not torrent_table else torrent_table.find_all('tr') if 2 > len(torrent_rows): raise generic.HaltParseException head = None for tr in torrent_rows[1:]: cells = tr.find_all('td') if 4 > len(cells): continue try: head = head if None is not head else self._header_row(tr) seeders, leechers, size = [tryInt(n, n) for n in [ cells[head[x]].get_text().strip() for x in 'seed', 'leech', 'size']] if None is tr.find('a', href=rc['cats']) or self._reject_item( seeders, leechers, self.freeleech and (None is rc['fl'].search(cells[1].get_text()))): continue info = tr.find('a', href=rc['info']) title = (info.attrs.get('title') or info.get_text()).strip() download_url = self._link('%s/%s' % ( re.sub(rc['get'], r'\1', str(info.attrs['href'])), str(title).replace(' ', '.'))) except (AttributeError, TypeError, ValueError, KeyError): continue if title and download_url: items[mode].append((title, download_url, seeders, self._bytesizer(size)))
def _search_provider(self, search_params, **kwargs): results = [] if not self._authorised(): return results items = {'Cache': [], 'Season': [], 'Episode': [], 'Propers': []} rc = dict((k, re.compile('(?i)' + v)) for (k, v) in {'info': 'detail', 'get': 'download', 'cats': 'cat=(?:%s)' % self._categories_string(template='', delimiter='|') }.items()) for mode in search_params.keys(): for search_string in search_params[mode]: search_string = isinstance(search_string, unicode) and unidecode(search_string) or search_string search_url = self.urls['search'] % (self._categories_string(), search_string) html = self.get_url(search_url) cnt = len(items[mode]) try: if not html or self._has_no_results(html): raise generic.HaltParseException with BS4Parser(html, features=['html5lib', 'permissive']) as soup: torrent_table = soup.find('table', id='torrentsTable') torrent_rows = [] if not torrent_table else torrent_table.find_all('tr') if 2 > len(torrent_rows): raise generic.HaltParseException for tr in torrent_rows[1:]: try: seeders, leechers, size = [tryInt(n, n) for n in [ tr.find_all('td')[x].get_text().strip() for x in (-2, -1, -3)]] if None is tr.find('a', href=rc['cats']) or self._peers_fail(mode, seeders, leechers): continue info = tr.find('a', href=rc['info']) title = 'title' in info.attrs and info.attrs['title'] or info.get_text().strip() download_url = self.urls['get'] % tr.find('a', href=rc['get']).get('href') except (AttributeError, TypeError, ValueError): continue if title and download_url: items[mode].append((title, download_url, seeders, self._bytesizer(size))) except generic.HaltParseException: pass except Exception: logger.log(u'Failed to parse. Traceback: %s' % traceback.format_exc(), logger.ERROR) self._log_search(mode, len(items[mode]) - cnt, search_url) self._sort_seeders(mode, items) results = list(set(results + items[mode])) return results
def _doSearch(self, search_params): results = [] items = {'Season': [], 'Episode': [], 'RSS': []} for mode in search_params.keys(): for search_string in search_params[mode]: if mode == 'RSS': searchURL = self.url + 'index.php?page=torrents&active=1&category=%s' %(';'.join(self.categories[mode])) logger.log(u"PublicHD cache update URL: "+ searchURL, logger.DEBUG) else: searchURL = self.searchurl %(urllib.quote(unidecode(search_string)), ';'.join(self.categories[mode])) logger.log(u"Search string: " + searchURL, logger.DEBUG) html = self.getURL(searchURL) if not html: continue try: soup = BeautifulSoup(html, features=["html5lib", "permissive"]) torrent_table = soup.find('table', attrs = {'id' : 'torrbg'}) torrent_rows = torrent_table.find_all('tr') if torrent_table else [] #Continue only if one Release is found if len(torrent_rows)<2: logger.log(u"The Data returned from " + self.name + " do not contains any torrent", logger.DEBUG) continue for tr in torrent_rows[1:]: try: link = self.url + tr.find(href=re.compile('page=torrent-details'))['href'] title = tr.find(lambda x: x.has_attr('title')).text.replace('_','.') url = tr.find(href=re.compile('magnet+'))['href'] seeders = int(tr.find_all('td', {'class': 'header'})[4].text) leechers = int(tr.find_all('td', {'class': 'header'})[5].text) except (AttributeError, TypeError): continue if mode != 'RSS' and seeders == 0: continue if not title or not url: continue item = title, url, link, seeders, leechers items[mode].append(item) except Exception, e: logger.log(u"Failed to parsing " + self.name + " Traceback: " + traceback.format_exc(), logger.ERROR) #For each search mode sort all the items by seeders items[mode].sort(key=lambda tup: tup[3], reverse=True) results += items[mode]
def _do_search(self, search_params, search_mode='eponly', epcount=0, age=0): results = [] if not self._do_login(): return results items = {'Season': [], 'Episode': [], 'Cache': []} rc = dict((k, re.compile('(?i)' + v)) for (k, v) in {'info': 'detail', 'get': 'download'}.items()) for mode in search_params.keys(): for search_string in search_params[mode]: if isinstance(search_string, unicode): search_string = unidecode(search_string) search_url = self.urls['search'] % (search_string, self.categories) html = self.get_url(search_url) cnt = len(items[mode]) try: if not html or self._has_no_results(html): raise generic.HaltParseException with BS4Parser(html, features=['html5lib', 'permissive']) as soup: torrent_table = soup.find('table', attrs={'border': '1'}) torrent_rows = [] if not torrent_table else torrent_table.find_all('tr') if 2 > len(torrent_rows): raise generic.HaltParseException for tr in torrent_rows[1:]: try: seeders, leechers = [int(tr.find_all('td')[x].get_text().strip()) for x in (-2, -1)] if 'Cache' != mode and (seeders < self.minseed or leechers < self.minleech): continue info = tr.find('a', href=rc['info']) title = 'title' in info.attrs and info.attrs['title'] or info.get_text().strip() download_url = self.urls['get'] % str(tr.find('a', href=rc['get'])['href']).lstrip('/') except (AttributeError, TypeError): continue if title and download_url: items[mode].append((title, download_url, seeders)) except generic.HaltParseException: pass except Exception: logger.log(u'Failed to parse. Traceback: %s' % traceback.format_exc(), logger.ERROR) self._log_result(mode, len(items[mode]) - cnt, search_url) # For each search mode sort all the items by seeders items[mode].sort(key=lambda tup: tup[2], reverse=True) results += items[mode] return results
def _search_provider(self, search_params, **kwargs): results = [] if not self._authorised(): return results items = {'Cache': [], 'Season': [], 'Episode': [], 'Propers': []} rc = dict((k, re.compile('(?i)' + v)) for (k, v) in {'info': 'detail', 'get': 'download'}.items()) for mode in search_params.keys(): for search_string in search_params[mode]: search_string = isinstance(search_string, unicode) and unidecode(search_string) or search_string search_url = self.urls['browse'] % (self._categories_string(), ('3', '0')[not self.freeleech], (self.urls['search'] % search_string, '')['Cache' == mode]) html = self.get_url(search_url) if self.should_skip(): return results cnt = len(items[mode]) try: if not html or self._has_no_results(html): raise generic.HaltParseException html = html.replace('<?xml version="1.0" encoding="iso-8859-1"?>', '') html = re.sub(r'(</td>)[^<]*</td>', r'\1', html) html = re.sub(r'(<a[^<]*)<a[^<]*?href=details[^<]*', r'\1', html) with BS4Parser(html, 'html.parser') as soup: shows_found = False torrent_rows = soup.find_all('tr') for index, row in enumerate(torrent_rows): if 'type' == row.find_all('td')[0].get_text().strip().lower(): shows_found = index break if not shows_found or 2 > (len(torrent_rows) - shows_found): raise generic.HaltParseException head = None for tr in torrent_rows[1 + shows_found:]: cells = tr.find_all('td') if 4 > len(cells): continue try: head = head if None is not head else self._header_row(torrent_rows[shows_found]) seeders, leechers, size = [tryInt(n, n) for n in [ cells[head[x]].get_text().strip() for x in 'seed', 'leech', 'size']] if self._reject_item(seeders, leechers): continue info = tr.find('a', href=rc['info']) title = (info.attrs.get('title') or info.get_text()).strip() download_url = self._link(tr.find('a', href=rc['get'])['href']) except (AttributeError, TypeError, ValueError, KeyError): continue if title and download_url: items[mode].append((title, download_url, seeders, self._bytesizer(size)))
def _search_provider(self, search_params, **kwargs): results = [] if not self._authorised(): return results items = {'Cache': [], 'Season': [], 'Episode': [], 'Propers': []} rc = dict((k, re.compile('(?i)' + v)) for (k, v) in {'info': 'detail', 'get': 'download', 'nuked': 'nuke', 'filter': 'free'}.items()) for mode in search_params.keys(): for search_string in search_params[mode]: search_string = isinstance(search_string, unicode) and unidecode(search_string) or search_string search_url = self.urls['search'] % (search_string, self._categories_string(mode, '%s', ',')) html = self.get_url(search_url, timeout=90) if self.should_skip(): return results cnt = len(items[mode]) try: if not html or self._has_no_results(html): raise generic.HaltParseException with BS4Parser(html, features=['html5lib', 'permissive'], attr='cellpadding="5"') as soup: torrent_table = soup.find('table', class_='browse') torrent_rows = [] if not torrent_table else torrent_table.find_all('tr') if 2 > len(torrent_rows): raise generic.HaltParseException head = None for tr in torrent_rows[1:]: cells = tr.find_all('td') if 5 > len(cells): continue try: info = tr.find('a', href=rc['info']) head = head if None is not head else self._header_row(tr) seeders, leechers, size = [n for n in [ cells[head[x]].get_text().strip() for x in 'leech', 'leech', 'size']] seeders, leechers, size = [tryInt(n, n) for n in list(re.findall('^(\d+)[^\d]+?(\d+)', leechers)[0]) + re.findall('^[^\n\t]+', size)] if self._reject_item(seeders, leechers, self.freeleech and (not tr.find('a', class_=rc['filter'])), self.confirmed and (any([tr.find('img', alt=rc['nuked']), tr.find('img', class_=rc['nuked'])]))): continue title = (info.attrs.get('title') or info.get_text()).strip() download_url = self._link(tr.find('a', href=rc['get'])['href']) except (AttributeError, TypeError, ValueError, KeyError): continue if title and download_url: items[mode].append((title, download_url, seeders, self._bytesizer(size)))
def _search_provider(self, search_params, **kwargs): results = [] if not self._authorised(): return results items = {'Cache': [], 'Season': [], 'Episode': [], 'Propers': []} rc = dict((k, re.compile('(?i)' + v)) for (k, v) in { 'info': 'detail', 'get': 'download', 'fl': '\(Freeleech\)'}.items()) for mode in search_params.keys(): for search_string in search_params[mode]: search_string = isinstance(search_string, unicode) and unidecode(search_string) or search_string search_url = self.urls['search'] % (search_string, self._categories_string(mode), ('3', '0')[not self.freeleech]) html = self.get_url(search_url, timeout=90) if self.should_skip(): return results cnt = len(items[mode]) try: if not html or self._has_no_results(html): raise generic.HaltParseException html = html.replace('<table width=100% border=0 align=center cellpadding=0 cellspacing=0>', '') html = re.sub(r'(?s)(.*)(<table[^>]*?950[^>]*>.*)(</body>)', r'\1\3', html) html = re.sub(r'(?s)<table[^>]+font[^>]+>', '<table id="parse">', html) html = re.sub(r'(?s)(<td[^>]+>(?!<[ab]).*?)(?:(?:</[ab]>)+)', r'\1', html) html = re.sub(r'(?m)^</td></tr></table>', r'', html) with BS4Parser(html, features=['html5lib', 'permissive'], attr='id="parse"') as soup: torrent_table = soup.find('table', id='parse') torrent_rows = [] if not torrent_table else torrent_table.find_all('tr') if 2 > len(torrent_rows): raise generic.HaltParseException head = None for tr in torrent_rows[1:]: cells = tr.find_all('td') if 5 > len(cells): continue try: head = head if None is not head else self._header_row(tr) seeders, leechers, size = [tryInt(n, n) for n in [ cells[head[x]].get_text().strip() for x in 'seed', 'leech', 'size']] if self._reject_item(seeders, leechers): continue info = tr.find('a', href=rc['info']) title = (info.attrs.get('title') or info.get_text().split()[0]).strip() download_url = self._link(tr.find('a', href=rc['get'])['href']) except (AttributeError, TypeError, ValueError, KeyError): continue if title and download_url: items[mode].append((title, download_url, seeders, self._bytesizer(size)))
def _doSearch(self, search_params, show=None): results = [] items = {"Season": [], "Episode": []} if not self._doLogin(): return [] for mode in search_params.keys(): for search_string in search_params[mode]: searchURL = self.urls["search"] % (unidecode(search_string), self.categories) logger.log(u"Search string: " + searchURL, logger.DEBUG) data = self.getURL(searchURL) if not data: continue try: html = BeautifulSoup(data, features=["html5lib", "permissive"]) torrent_table = html.find("table", attrs={"id": "torrenttable"}) torrent_rows = torrent_table.find_all("tr") if torrent_table else [] if not torrent_rows: # logger.log(u"No results found for: " + search_string + "(" + searchURL + ")", logger.DEBUG) continue for result in torrent_table.find_all("tr")[1:]: link = result.find("td", attrs={"class": "name"}).find("a") url = result.find("td", attrs={"class": "quickdownload"}).find("a") title = link.string download_url = self.urls["download"] % url["href"] id = int(link["href"].replace("/torrent/", "")) seeders = int(result.find("td", attrs={"class": "seeders"}).string) leechers = int(result.find("td", attrs={"class": "leechers"}).string) # Filter unseeded torrent if seeders == 0 or not title or not download_url: continue item = title, download_url, id, seeders, leechers logger.log(u"Found result: " + title + "(" + searchURL + ")", logger.DEBUG) items[mode].append(item) except Exception, e: logger.log(u"Failed parsing " + self.name + " Traceback: " + traceback.format_exc(), logger.ERROR) # For each search mode sort all the items by seeders items[mode].sort(key=lambda tup: tup[3], reverse=True) results += items[mode]
def _search_provider(self, search_params, **kwargs): results = [] if not self._authorised(): return results items = {'Cache': [], 'Season': [], 'Episode': [], 'Propers': []} rc = dict((k, re.compile('(?i)' + v)) for (k, v) in {'info': 'view', 'get': 'download', 'name': 'showname', 'nuked': 'nuked'}.items()) for mode in search_params.keys(): for search_string in search_params[mode]: search_string = isinstance(search_string, unicode) and unidecode(search_string) or search_string search_url = self.urls['search'] % search_string # fetches 15 results by default, and up to 100 if allowed in user profile html = self.get_url(search_url) if self.should_skip(): return results cnt = len(items[mode]) try: if not html or self._has_no_results(html): raise generic.HaltParseException with BS4Parser(html, features=['html5lib', 'permissive']) as soup: torrent_table = soup.find('table', class_='torrent_table') torrent_rows = [] if torrent_table: torrent_rows = torrent_table.find_all('tr') if 2 > len(torrent_rows): raise generic.HaltParseException head = None for tr in torrent_rows[1:]: cells = tr.find_all('td') if 5 > len(cells) or tr.find('img', alt=rc['nuked']): continue try: head = head if None is not head else self._header_row(tr) seeders, leechers, size = [tryInt(n, n) for n in [ cells[head[x]].get_text().strip() for x in 'seed', 'leech', 'size']] if self._reject_item(seeders, leechers): continue title = tr.find('a', title=rc['info']).get_text().strip() if title.lower().startswith('season '): title = '%s %s' % (tr.find('div', class_=rc['name']).get_text().strip(), title) download_url = self._link(tr.find('a', href=rc['get'])['href']) except (AttributeError, TypeError, ValueError): continue if title and download_url: items[mode].append((title, download_url, seeders, self._bytesizer(size)))
def _search_provider(self, search_params, **kwargs): results = [] items = {'Cache': [], 'Season': [], 'Episode': [], 'Propers': []} rc = dict((k, re.compile('(?i)' + v)) for (k, v) in { 'info': '(^(info|torrent)/|/[\w+]{40,}\s*$)', 'get': '^magnet:'}.items()) for mode in search_params.keys(): for search_string in search_params[mode]: search_string = isinstance(search_string, unicode) and unidecode(search_string) or search_string if 'Cache' == mode: search_url = self.urls['search'] % tuple(search_string.split(',')) else: search_url = self.urls['search'] % (search_string.replace('.', ' '), '') html = self.get_url(search_url) if self.should_skip(): return results cnt = len(items[mode]) try: if not html or self._has_no_results(html): raise generic.HaltParseException with BS4Parser(html, features=['html5lib', 'permissive']) as soup: torrent_table = soup.find('table', attrs={'class': ['table', 'is-striped']}) torrent_rows = [] if not torrent_table else torrent_table.find_all('tr') if 2 > len(torrent_rows): raise generic.HaltParseException head = None for tr in torrent_rows[1:]: cells = tr.find_all('td') if 5 > len(cells): continue try: head = head if None is not head else self._header_row(tr) seeders, leechers, size = [tryInt(n, n) for n in [ cells[head[x]].get_text().strip() for x in 'seed', 'leech', 'size']] if self._reject_item(seeders, leechers): continue info = tr.select( '[alt*="magnet"], [title*="magnet"], [alt*="torrent"], [title*="torrent"]')[0] \ or tr.find('a', href=rc['info']) title = re.sub('\s(using|use|magnet|link)', '', ( info.attrs.get('title') or info.attrs.get('alt') or info.get_text())).strip() download_url = self._link(tr.find('a', href=rc['get'])['href']) except (AttributeError, TypeError, ValueError, KeyError): continue if title and download_url: items[mode].append((title, download_url, seeders, self._bytesizer(size)))
def logged_in(self, y): if all([None is y or 'logout' in y, bool(filter(lambda c: 'remember_web_' in c, self.session.cookies.keys()))]): if None is not y: self.shows = dict(re.findall('<option value="(\d+)">(.*?)</option>', y)) h = HTMLParser() for k, v in self.shows.items(): self.shows[k] = sanitizeSceneName(h.unescape(unidecode(v.decode('utf-8')))) return True return False
def _search_provider(self, search_params, **kwargs): results = [] if not self._authorised(): return results items = {'Cache': [], 'Season': [], 'Episode': [], 'Propers': []} rc = dict((k, re.compile('(?i)' + v)) for (k, v) in {'nodots': '[\.\s]+'}.items()) for mode in search_params.keys(): for search_string in search_params[mode]: search_string = isinstance(search_string, unicode) and unidecode(search_string) or search_string search_url = self.urls['browse'] % (self.user_authkey, self.user_passkey) if 'Cache' != mode: search_url += self.urls['search'] % rc['nodots'].sub('+', search_string) data_json = self.get_url(search_url, json=True) if self.should_skip(): return results cnt = len(items[mode]) try: for item in data_json.get('response', {}).get('results', []): if self.freeleech and not item.get('isFreeleech'): continue seeders, leechers, group_name, torrent_id, size = [tryInt(n, n) for n in [item.get(x) for x in [ 'seeders', 'leechers', 'groupName', 'torrentId', 'size']]] if self._reject_item(seeders, leechers): continue try: title_parts = group_name.split('[') maybe_res = re.findall('((?:72|108|216)0\w)', title_parts[1]) maybe_ext = re.findall('(?i)(%s)' % '|'.join(common.mediaExtensions), title_parts[1]) detail = title_parts[1].split('/') detail[1] = detail[1].strip().lower().replace('mkv', 'x264') title = '%s.%s' % (BS4Parser(title_parts[0].strip(), 'html.parser').soup.string, '.'.join( (maybe_res and [maybe_res[0]] or []) + [detail[0].strip(), detail[1], maybe_ext and maybe_ext[0].lower() or 'mkv'])) except (IndexError, KeyError): title = self.regulate_title(item, group_name) download_url = self.urls['get'] % (self.user_authkey, self.user_passkey, torrent_id) if title and download_url: items[mode].append((title, download_url, seeders, self._bytesizer(size))) except (StandardError, Exception): logger.log(u'Failed to parse. Traceback: %s' % traceback.format_exc(), logger.ERROR) self._log_search(mode, len(items[mode]) - cnt, search_url) results = self._sort_seeding(mode, results + items[mode]) return results
def _search_provider(self, search_params, **kwargs): results = [] if not self._authorised(): return results items = {'Cache': [], 'Season': [], 'Episode': [], 'Propers': []} rc = dict((k, re.compile('(?i)' + v)) for (k, v) in {'info': 'details', 'get': 'download', 'size': '(.*)\n.*'}.items()) for mode in search_params.keys(): rc['cats'] = re.compile('(?i)cat=(?:%s)' % self._categories_string(mode, template='', delimiter='|')) for search_string in search_params[mode]: search_string = isinstance(search_string, unicode) and unidecode(search_string) or search_string html = self.get_url(self.urls['search'] % ('+'.join(search_string.split()), self._categories_string(mode))) if self.should_skip(): return results cnt = len(items[mode]) try: if not html or self._has_no_results(html): raise generic.HaltParseException with BS4Parser(html, features=['html5lib', 'permissive']) as soup: torrent_table = soup.find('table', id='torrents-table') torrent_rows = [] if not torrent_table else torrent_table.find_all('tr') if 2 > len(torrent_rows): raise generic.HaltParseException head = None for tr in torrent_rows[1:]: cells = tr.find_all('td') if 5 > len(cells): continue try: head = head if None is not head else self._header_row( tr, {'seed': r'(?:up\.png|seed|s/l)', 'leech': r'(?:down\.png|leech|peers)'}) seeders, leechers, size = [tryInt(n, n) for n in [ cells[head[x]].get_text().strip() for x in 'seed', 'leech', 'size']] if not tr.find('a', href=rc['cats']) or self._reject_item(seeders, leechers): continue title = tr.find('a', href=rc['info']).get_text().strip() size = rc['size'].sub(r'\1', size) download_url = self._link(tr.find('a', href=rc['get'])['href']) except (AttributeError, TypeError, ValueError, IndexError): continue if title and download_url: items[mode].append((title, download_url, seeders, self._bytesizer(size)))
def _doSearch(self, search_params, search_mode='eponly', epcount=0, age=0, epObj=None): logger.log(u"_doSearch started with ..." + str(search_params), logger.DEBUG) results = [] items = {'Season': [], 'Episode': [], 'RSS': []} for mode in search_params.keys(): for search_string in search_params[mode]: if isinstance(search_string, unicode): search_string = unidecode(search_string) searchURL = self.urls['search'] % (urllib.quote(search_string), self.categories) logger.log(u"Search string: " + searchURL, logger.DEBUG) data = self.getURL(searchURL) if not data: continue try: with BS4Parser(data, features=["html5lib", "permissive"]) as html: result_linkz = html.findAll('a', href=re.compile("torrents-details")) if not result_linkz: logger.log(u"The Data returned from " + self.name + " do not contains any torrent", logger.DEBUG) continue if result_linkz: for link in result_linkz: title = link.text logger.log(u"BLUETIGERS TITLE TEMP: " + title, logger.DEBUG) download_url = self.urls['base_url'] + "/" + link['href'] download_url = download_url.replace("torrents-details","download") logger.log(u"BLUETIGERS downloadURL: " + download_url, logger.DEBUG) if not title or not download_url: continue item = title, download_url logger.log(u"Found result: " + title.replace(' ','.') + " (" + download_url + ")", logger.DEBUG) items[mode].append(item) except Exception, e: logger.log(u"Failed parsing " + self.name + " Traceback: " + traceback.format_exc(), logger.ERROR) results += items[mode]
def _search_provider(self, search_params, **kwargs): results = [] if not self._authorised(): return results items = {'Cache': [], 'Season': [], 'Episode': [], 'Propers': []} rc = dict((k, re.compile('(?i)' + v)) for (k, v) in { 'info': 'detail', 'get': 'download', 'filter': 'fa-(?:heart|star)'}.items()) for mode in search_params.keys(): for search_string in search_params[mode]: search_string = isinstance(search_string, unicode) and unidecode(search_string) or search_string search_url = self.urls['search'] % (search_string, self._categories_string(mode, 'cats2[]=%s')) html = self.get_url(search_url, timeout=90) if self.should_skip(): return results cnt = len(items[mode]) try: if not html or self._has_no_results(html): raise generic.HaltParseException with BS4Parser(html, features=['html5lib', 'permissive']) as soup: torrent_table = soup.find('table', class_='yenitorrenttable') torrent_rows = [] if not torrent_table else torrent_table.find_all('tr') if 2 > len(torrent_rows): raise generic.HaltParseException head = None for tr in torrent_rows[1:]: cells = tr.find_all('td') if 5 > len(cells): continue try: head = head if None is not head else self._header_row( tr, custom_tags=[('span', 'data-original-title')]) seeders, leechers, size = [n for n in [ cells[head[x]].get_text().strip() for x in 'seed', 'leech', 'size']] if self._reject_item(seeders, leechers, self.freeleech and ( not tr.find('i', class_=rc['filter']))): continue title = tr.find('a', href=rc['info']).get_text().strip() download_url = self._link(tr.find('a', href=rc['get'])['href']) except (AttributeError, TypeError, ValueError, KeyError): continue if title and download_url: items[mode].append((title, download_url, seeders, self._bytesizer(size)))
def _search_provider(self, search_params, **kwargs): results = [] if not self._authorised(): return results items = {'Cache': [], 'Season': [], 'Episode': [], 'Propers': []} rc = dict((k, re.compile('(?i)' + v)) for (k, v) in {'info': 'view', 'get': 'download'}.items()) for mode in search_params.keys(): for search_string in search_params[mode]: search_string = isinstance(search_string, unicode) and unidecode(search_string) or search_string search_url = self.urls['search'] % (search_string, ('&freetorrent=1', '')[not self.freeleech]) html = self.get_url(search_url) if self.should_skip(): return results cnt = len(items[mode]) try: if not html or self._has_no_results(html): raise generic.HaltParseException with BS4Parser(html, features=['html5lib', 'permissive']) as soup: torrent_table = soup.find(id='torrent_table') torrent_rows = [] if not torrent_table else torrent_table.find_all('tr') if 2 > len(torrent_rows): raise generic.HaltParseException head = None for tr in torrent_rows[1:]: cells = tr.find_all('td') if 5 > len(cells): continue try: head = head if None is not head else self._header_row(tr) seeders, leechers, size = [tryInt(n, n) for n in [ cells[head[x]].get_text().strip() for x in 'seed', 'leech', 'size']] if self._reject_item(seeders, leechers, self.freeleech and ( any([not tr.select('.tl_free'), tr.select('.tl_timed'), tr.select('[title^="Timed Free"]'), tr.select('.tl_expired'), tr.select('[title^="Expired Free"]')]))): continue title = tr.find('a', title=rc['info']).get_text().strip() download_url = self._link(tr.find('a', title=rc['get'])['href']) except (AttributeError, TypeError, ValueError): continue if title and download_url: items[mode].append((title, download_url, seeders, self._bytesizer(size)))
def _search_provider(self, search_params, **kwargs): results = [] if not self._authorised(): return results items = {'Cache': [], 'Season': [], 'Episode': [], 'Propers': []} rc = dict((k, re.compile('(?i)' + v)) for (k, v) in {'get': 'download'}.items()) for mode in search_params.keys(): for search_string in search_params[mode]: search_url = self.urls[('search', 'browse')['Cache' == mode]] % { 'cats': self._categories_string(mode, '', ','), 'query': isinstance(search_string, unicode) and unidecode(search_string) or search_string} html = self.get_url(search_url) if self.should_skip(): return results cnt = len(items[mode]) try: if not html or self._has_no_results(html): raise generic.HaltParseException with BS4Parser(html, features=['html5lib', 'permissive']) as soup: torrent_table = soup.find(id='torrenttable') torrent_rows = [] if not torrent_table else torrent_table.find_all('tr') if 2 > len(torrent_rows): raise generic.HaltParseException head = None for tr in torrent_rows[1:]: cells = tr.find_all('td') if 6 > len(cells): continue try: head = head if None is not head else self._header_row(tr) seeders, leechers = [tryInt(n) for n in [ tr.find('td', class_=x).get_text().strip() for x in 'seeders', 'leechers']] if self._reject_item(seeders, leechers): continue info = tr.find('td', class_='name').a title = (info.attrs.get('title') or info.get_text()).strip() size = cells[head['size']].get_text().strip() download_url = self._link(tr.find('a', href=rc['get'])['href']) except (AttributeError, TypeError, ValueError): continue if title and download_url: items[mode].append((title, download_url, seeders, self._bytesizer(size)))
def _search_provider(self, search_params, **kwargs): results = [] if self.show and not self.show.is_anime: return results items = {'Cache': [], 'Season': [], 'Episode': [], 'Propers': []} rc = dict((k, re.compile('(?i)' + v)) for (k, v) in {'info': 'view', 'get': '(?:torrent|magnet:)'}.items()) for mode in search_params.keys(): for search_string in search_params[mode]: search_string = isinstance(search_string, unicode) and unidecode(search_string) or search_string search_url = self.urls['search'] % ((0, 2)[self.confirmed], search_string.replace('.', ' ')) html = self.get_url(search_url) if self.should_skip(): return results cnt = len(items[mode]) try: if not html or self._has_no_results(html): raise generic.HaltParseException with BS4Parser(html, features=['html5lib', 'permissive']) as soup: torrent_table = soup.find('table', class_='torrent-list') torrent_rows = [] if not torrent_table else torrent_table.find_all('tr') if 2 > len(torrent_rows): raise generic.HaltParseException head = None for tr in torrent_rows[1:]: cells = tr.find_all('td') if 5 > len(cells): continue try: head = head if None is not head else self._header_row(tr) seeders, leechers, size = [tryInt(n, n) for n in [ cells[head[x]].get_text().strip() for x in 'seed', 'leech', 'size']] if self._reject_item(seeders, leechers): continue title = tr.find('a', href=rc['info']).get_text().strip() download_url = self._link(tr.find('a', href=rc['get'])['href']) except (AttributeError, TypeError, ValueError, IndexError): continue if title and download_url: items[mode].append((title, download_url, seeders, self._bytesizer(size)))
def _search_provider(self, search_params, **kwargs): results = [] if not self._authorised(): return results items = {'Cache': [], 'Season': [], 'Episode': [], 'Propers': []} rc = dict((k, re.compile('(?i)' + v)) for (k, v) in { 'info': 'view', 'get': 'download', 'nodots': '[\.\s]+'}.items()) for mode in search_params.keys(): for search_string in search_params[mode]: search_string = isinstance(search_string, unicode) and unidecode(search_string) or search_string search_url = self.urls['search'] % ( self._categories_string(mode, 'filter_cat[%s]=1'), rc['nodots'].sub('+', search_string)) html = self.get_url(search_url) cnt = len(items[mode]) try: if not html or self._has_no_results(html) or 'Translation: No search results' in html: raise generic.HaltParseException with BS4Parser(html, features=['html5lib', 'permissive']) as soup: torrent_table = soup.find(id='torrent_table') torrent_rows = [] if not torrent_table else torrent_table.find_all('tr') if 2 > len(torrent_rows): raise generic.HaltParseException head = None for tr in torrent_rows[1:]: cells = tr.find_all('td') if 5 > len(cells): continue try: head = head if None is not head else self._header_row(tr) seeders, leechers, size = [tryInt(n, n) for n in [ cells[head[x]].get_text().strip() for x in 'seed', 'leech', 'size']] if self._reject_item(seeders, leechers, self.freeleech and (not bool( re.search('(?i)>\s*Freeleech!*\s*<', cells[1].encode(formatter='minimal'))))): continue title = self.regulate_title(tr.find('a', title=rc['info']).get_text().strip()) download_url = self._link(tr.find('a', title=rc['get'])['href']) except (AttributeError, TypeError, ValueError): continue if title and download_url: items[mode].append((title, download_url, seeders, self._bytesizer(size)))
def _search_provider(self, search_params, **kwargs): results = [] if not self._authorised(): return results items = {'Cache': [], 'Season': [], 'Episode': [], 'Propers': []} rc = dict((k, re.compile('(?i)' + v)) for (k, v) in { 'list': '.*?torrent_all', 'info': 'details', 'key': 'key=([^"]+)">Torrent let'}.iteritems()) for mode in search_params.keys(): for search_string in search_params[mode]: search_string = isinstance(search_string, unicode) and unidecode(search_string) or search_string search_url = self.urls['search'] % search_string # fetches 15 results by default, and up to 100 if allowed in user profile html = self.get_url(search_url) if self.should_skip(): return results cnt = len(items[mode]) try: if not html or self._has_no_results(html): raise generic.HaltParseException with BS4Parser(html, features=['html5lib', 'permissive']) as soup: torrent_table = soup.find('div', class_=rc['list']) torrent_rows = [] if not torrent_table else torrent_table.find_all('div', class_='box_torrent') key = rc['key'].findall(html)[0] if not len(torrent_rows): raise generic.HaltParseException for tr in torrent_rows: try: seeders, leechers, size = [tryInt(n, n) for n in [ tr.find('div', class_=x).get_text().strip() for x in 'box_s2', 'box_l2', 'box_meret2']] if self._reject_item(seeders, leechers): continue anchor = tr.find('a', href=rc['info']) title = (anchor.get('title') or anchor.get_text()).strip() download_url = self._link(anchor.get('href').replace('details', 'download')) + key except (AttributeError, TypeError, ValueError): continue if title and download_url: items[mode].append((title, download_url, seeders, self._bytesizer(size)))
def slugify(text, separator="-"): ret = "" if text: for c in unidecode(text).lower(): try: ret += htmlentitydefs.codepoint2name[ord(c)] except: ret += c ret = re.sub("([a-zA-Z])(uml|acute|grave|circ|tilde|cedil)", r"\1", ret) ret = re.sub("\W", " ", ret) ret = re.sub(" +", separator, ret) return ret.strip()
def _doSearch(self, search_params, search_mode='eponly', epcount=0, age=0, epObj=None): results = [] items = {'Season': [], 'Episode': [], 'RSS': []} if not self._doLogin(): return results data = [] searchURLS = [] for mode in search_params.keys(): for search_string in search_params[mode]: if isinstance(search_string, unicode): search_string = unidecode(search_string) if mode == 'Season' and search_mode == 'sponly': searchURLS += [self.urls['archive'] % (urllib.quote(search_string))] else: searchURLS += [self.urls['search'] % (urllib.quote(search_string), self.categories)] searchURLS += [self.urls['nonscene'] % (urllib.quote(search_string))] searchURLS += [self.urls['foreign'] % (urllib.quote(search_string))] for searchURL in searchURLS: logger.log(u"Search string: " + searchURL, logger.DEBUG) try: data += [x for x in [self.getURL(searchURL)] if x] except Exception as e: logger.log(u"Unable to fetch data reason: {0}".format(str(e)), logger.WARNING) if not len(data): continue try: for dataItem in data: with BS4Parser(dataItem, features=["html5lib", "permissive"]) as html: torrent_table = html.find('table', attrs={'id': 'torrents-table'}) torrent_rows = torrent_table.find_all('tr') if torrent_table else [] #Continue only if at least one Release is found if len(torrent_rows) < 2: if html.title: source = self.name + " (" + html.title.string + ")" else: source = self.name logger.log(u"The Data returned from " + source + " does not contain any torrent", logger.DEBUG) continue for result in torrent_table.find_all('tr')[1:]: try: link = result.find('td', attrs={'class': 'ttr_name'}).find('a') all_urls = result.find('td', attrs={'class': 'td_dl'}).find_all('a', limit=2) # Foreign section contain two links, the others one if self._isSection('Foreign', dataItem): url = all_urls[1] else: url = all_urls[0] title = link.string if re.search('\.\.\.', title): data = self.getURL(self.url + "/" + link['href']) if data: with BS4Parser(data) as details_html: title = re.search('(?<=").+(?<!")', details_html.title.string).group(0) download_url = self.urls['download'] % url['href'] id = int(link['href'].replace('details?id=', '')) seeders = int(result.find('td', attrs={'class': 'ttr_seeders'}).string) leechers = int(result.find('td', attrs={'class': 'ttr_leechers'}).string) except (AttributeError, TypeError): continue if mode != 'RSS' and (seeders < self.minseed or leechers < self.minleech): continue if not title or not download_url: continue item = title, download_url, id, seeders, leechers logger.log(u"Found result: " + title.replace(' ','.') + " (" + searchURL + ")", logger.DEBUG) items[mode].append(item) # for each search mode sort all the items by seeders items[mode].sort(key=lambda tup: tup[3], reverse=True) results += items[mode] except Exception, e: logger.log(u"Failed parsing " + self.name + " Traceback: " + traceback.format_exc(), logger.ERROR) continue
def _doSearch(self, search_params, search_mode='eponly', epcount=0, age=0): results = [] items = {'Season': [], 'Episode': [], 'RSS': []} freeleech = '&free=on' if self.freeleech else '' if not self._doLogin(): return [] for mode in search_params.keys(): for search_string in search_params[mode]: # URL with 50 tv-show results, or max 150 if adjusted in IPTorrents profile searchURL = self.urls['search'] % (self.categorie, freeleech, unidecode(search_string)) searchURL += ';o=seeders' if mode != 'RSS' else '' logger.log(u"" + self.name + " search page URL: " + searchURL, logger.DEBUG) data = self.getURL(searchURL) if not data: continue try: data = re.sub(r'<button.+?<[\/]button>', '', data, 0, re.IGNORECASE | re.MULTILINE) with BS4Parser(data, features=["html5lib", "permissive"]) as html: if not html: logger.log(u"Invalid HTML data: " + str(data), logger.DEBUG) continue if html.find(text='No Torrents Found!'): logger.log( u"No results found for: " + search_string + " (" + searchURL + ")", logger.DEBUG) continue torrent_table = html.find('table', attrs={'class': 'torrents'}) torrents = torrent_table.find_all( 'tr') if torrent_table else [] #Continue only if one Release is found if len(torrents) < 2: logger.log( u"The Data returned from " + self.name + " do not contains any torrent", logger.WARNING) continue for result in torrents[1:]: try: torrent = result.find_all('td')[1].find('a') torrent_name = torrent.string torrent_download_url = self.urls['base_url'] + ( result.find_all('td')[3].find('a'))['href'] torrent_details_url = self.urls[ 'base_url'] + torrent['href'] torrent_seeders = int( result.find('td', attrs={ 'class': 'ac t_seeders' }).string) ## Not used, perhaps in the future ## #torrent_id = int(torrent['href'].replace('/details.php?id=', '')) #torrent_leechers = int(result.find('td', attrs = {'class' : 'ac t_leechers'}).string) except (AttributeError, TypeError): continue # Filter unseeded torrent and torrents with no name/url if mode != 'RSS' and torrent_seeders == 0: continue if not torrent_name or not torrent_download_url: continue item = torrent_name, torrent_download_url logger.log( u"Found result: " + torrent_name + " (" + torrent_details_url + ")", logger.DEBUG) items[mode].append(item) except Exception, e: logger.log( u"Failed parsing " + self.name + " Traceback: " + traceback.format_exc(), logger.ERROR) results += items[mode]
def _doSearch(self, search_params, show=None, age=None): results = [] items = {'Season': [], 'Episode': [], 'RSS': []} for mode in search_params.keys(): for search_string in search_params[mode]: if mode == 'RSS': searchURL = self.url + 'index.php?page=torrents&active=1&category=%s' % ( ';'.join(self.categories[mode])) logger.log(u"PublicHD cache update URL: " + searchURL, logger.DEBUG) else: searchURL = self.searchurl % (urllib.quote( unidecode(search_string)), ';'.join( self.categories[mode])) logger.log(u"Search string: " + searchURL, logger.DEBUG) html = self.getURL(searchURL) #remove unneccecary <option> lines which are slowing down BeautifulSoup optreg = re.compile(r'<option.*</option>') html = os.linesep.join( [s for s in html.splitlines() if not optreg.search(s)]) if not html: continue try: soup = BeautifulSoup(html, features=["html5lib", "permissive"]) torrent_table = soup.find('table', attrs={'id': 'torrbg'}) torrent_rows = torrent_table.find_all( 'tr') if torrent_table else [] #Continue only if one Release is found if len(torrent_rows) < 2: logger.log( u"The Data returned from " + self.name + " do not contains any torrent", logger.DEBUG) continue for tr in torrent_rows[1:]: try: link = self.url + tr.find(href=re.compile( 'page=torrent-details'))['href'] title = tr.find( lambda x: x.has_attr('title')).text.replace( '_', '.') url = tr.find(href=re.compile('magnet+'))['href'] seeders = int( tr.find_all('td', {'class': 'header'})[4].text) leechers = int( tr.find_all('td', {'class': 'header'})[5].text) except (AttributeError, TypeError): continue if mode != 'RSS' and seeders == 0: continue if not title or not url: continue item = title, url, link, seeders, leechers items[mode].append(item) except Exception, e: logger.log( u"Failed to parsing " + self.name + " Traceback: " + traceback.format_exc(), logger.ERROR) #For each search mode sort all the items by seeders items[mode].sort(key=lambda tup: tup[3], reverse=True) results += items[mode]
def _doSearch(self, search_params, show=None, age=None): results = [] items = {'Season': [], 'Episode': [], 'RSS': []} for mode in search_params.keys(): for search_string in search_params[mode]: if mode != 'RSS': searchURL = self.searchurl % (urllib.quote( unidecode(search_string))) logger.log(u"Search string: " + searchURL, logger.DEBUG) else: searchURL = self.url + 'tv/?field=time_add&sorder=desc' logger.log(u"KAT cache update URL: " + searchURL, logger.DEBUG) html = self.getURL(searchURL) if not html: continue try: soup = BeautifulSoup(html, features=["html5lib", "permissive"]) torrent_table = soup.find('table', attrs={'class': 'data'}) torrent_rows = torrent_table.find_all( 'tr') if torrent_table else [] #Continue only if one Release is found if len(torrent_rows) < 2: logger.log( u"The Data returned from " + self.name + " do not contains any torrent", logger.WARNING) continue for tr in torrent_rows[1:]: try: link = urlparse.urljoin(self.url, (tr.find( 'div', { 'class': 'torrentname' }).find_all('a')[1])['href']) id = tr.get('id')[-7:] title = (tr.find('div', { 'class': 'torrentname' }).find_all('a')[1]).text url = tr.find('a', 'imagnet')['href'] verified = True if tr.find('a', 'iverify') else False trusted = True if tr.find( 'img', {'alt': 'verified'}) else False seeders = int(tr.find_all('td')[-2].text) leechers = int(tr.find_all('td')[-1].text) except (AttributeError, TypeError): continue if mode != 'RSS' and seeders == 0: continue if sickbeard.KAT_VERIFIED and not verified: logger.log( u"KAT Provider found result " + title + " but that doesn't seem like a verified result so I'm ignoring it", logger.DEBUG) continue #Check number video files = episode in season and find the real Quality for full season torrent analyzing files in torrent if mode == 'Season': ep_number = int( len(search_params['Episode']) / len(set(allPossibleShowNames(self.show)))) title = self._find_season_quality( title, link, ep_number) if not title or not url: continue item = title, url, id, seeders, leechers items[mode].append(item) except Exception, e: logger.log( u"Failed to parsing " + self.name + " Traceback: " + traceback.format_exc(), logger.ERROR) #For each search mode sort all the items by seeders items[mode].sort(key=lambda tup: tup[3], reverse=True) results += items[mode]
def _doSearch(self, search_params, search_mode='eponly', epcount=0, age=0): results = [] items = {'Season': [], 'Episode': [], 'RSS': []} self.categories = "cat=" + str(self.cat) if not self._doLogin(): return [] for mode in search_params.keys(): for search_string in search_params[mode]: if isinstance(search_string, unicode): search_string = unidecode(search_string) last_page = 0 y = int(self.page) if search_string == '': continue search_string = str(search_string).replace('.', ' ') for x in range(0, y): z = x * 20 if last_page: break logger.log(u"Page: " + str(x) + " of " + str(y), logger.DEBUG) if mode != 'RSS': searchURL = (self.urls['search_page'] + '&filter={2}').format( z, self.categories, search_string) else: searchURL = self.urls['search_page'].format( z, self.categories) logger.log(u"Search string: " + searchURL, logger.DEBUG) data = self.getURL(searchURL) if not data: logger.log(u"data is empty", logger.DEBUG) continue try: with BS4Parser(data, features=["html5lib", "permissive"]) as html: torrent_table = html.find( 'table', attrs={'class': 'copyright'}) torrent_rows = torrent_table.find_all( 'tr') if torrent_table else [] #Continue only if one Release is found logger.log( u"Num of Row: " + str(len(torrent_rows)), logger.DEBUG) # if len(torrent_rows) == 0: # # self._uid = "" # self._hash = "" # self._session_id = "" # # if not self._doLogin(): # return [] # # continue if len(torrent_rows) < 3: logger.log( u"The Data returned from " + self.name + " do not contains any torrent", logger.DEBUG) last_page = 1 continue if len(torrent_rows) < 42: last_page = 1 for result in torrent_table.find_all('tr')[2:]: try: link = result.find('td').find('a') title = link.string id = ((result.find_all('td')[8].find('a') )['href'])[-8:] download_url = self.urls['download'] % (id) leechers = result.find_all( 'td')[3].find_all('td')[1].text leechers = int(leechers.strip('[]')) seeders = result.find_all( 'td')[3].find_all('td')[2].text seeders = int(seeders.strip('[]')) except (AttributeError, TypeError): continue if mode != 'RSS' and ( seeders < self.minseed or leechers < self.minleech): continue if not title or not download_url: continue title = title.replace(" 720p", "").replace( " Versione 720p", "").replace( " Versione 1080p", "") + self._reverseQuality( self._episodeQuality(result)) item = title, download_url, id, seeders, leechers logger.log( u"Found result: " + title + "(" + searchURL + ")", logger.DEBUG) if not self._is_italian( result) and not self.subtitle: logger.log(u"Subtitled, Skipped", logger.DEBUG) continue else: logger.log( u"Not Subtitled or Forced, Got It!", logger.DEBUG) items[mode].append(item) except Exception, e: logger.log( u"Failed parsing " + self.name + " Traceback: " + traceback.format_exc(), logger.ERROR) #For each search mode sort all the items by seeders items[mode].sort(key=lambda tup: tup[3], reverse=True) results += items[mode]
def _search_provider(self, search_params, **kwargs): results = [] if not self._authorised(): return results items = {'Cache': [], 'Season': [], 'Episode': [], 'Propers': []} rc = dict( (k, re.compile('(?i)' + v)) for (k, v) in { 'list': '.*?torrent_all', 'info': 'details', 'key': 'key=([^"]+)">Torrent let' }.iteritems()) for mode in search_params.keys(): for search_string in search_params[mode]: search_string = isinstance( search_string, unicode) and unidecode(search_string) or search_string search_url = self.urls['search'] % search_string # fetches 15 results by default, and up to 100 if allowed in user profile html = self.get_url(search_url) cnt = len(items[mode]) try: if not html or self._has_no_results(html): raise generic.HaltParseException with BS4Parser(html, features=['html5lib', 'permissive']) as soup: torrent_table = soup.find('div', class_=rc['list']) torrent_rows = [] if not torrent_table else torrent_table.find_all( 'div', class_='box_torrent') key = rc['key'].findall(html)[0] if not len(torrent_rows): raise generic.HaltParseException for tr in torrent_rows: try: seeders, leechers, size = [ tryInt(n, n) for n in [ tr.find('div', class_=x).get_text().strip() for x in 'box_s2', 'box_l2', 'box_meret2' ] ] if self._peers_fail(mode, seeders, leechers): continue anchor = tr.find('a', href=rc['info']) title = (anchor.get('title') or anchor.get_text()).strip() download_url = self._link( anchor.get('href').replace( 'details', 'download')) + key except (AttributeError, TypeError, ValueError): continue if title and download_url: items[mode].append( (title, download_url, seeders, self._bytesizer(size)))
def html(self, mode, search_string, results): if 'Content-Type' in self.session.headers: del (self.session.headers['Content-Type']) setattr( self.session, 'reserved', { 'headers': { 'Accept': 'text/html, application/xhtml+xml, */*', 'Accept-Language': 'en-GB', 'Cache-Control': 'no-cache', 'Referer': 'https://broadcasthe.net/login.php', 'User-Agent': self.ua } }) self.headers = None if self.auth_html or self._authorised_html(): del (self.session.reserved['headers']['Referer']) if 'Referer' in self.session.headers: del (self.session.headers['Referer']) self.auth_html = True search_string = isinstance( search_string, unicode) and unidecode(search_string) or search_string search_url = self.urls['search'] % (search_string, self._categories_string( mode, 'filter_cat[%s]=1')) html = self.get_url(search_url, use_tmr_limit=False) if self.should_skip(log_warning=False, use_tmr_limit=False): return results cnt = len(results) try: if not html or self._has_no_results(html): raise generic.HaltParseException with BS4Parser(html) as soup: tbl = soup.find(id='torrent_table') tbl_rows = [] if not tbl else tbl.find_all('tr') if 2 > len(tbl_rows): raise generic.HaltParseException rc = dict( (k, re.compile('(?i)' + v)) for (k, v) in { 'cats': r'(?i)cat\[(?:%s)\]' % self._categories_string( mode, template='', delimiter='|'), 'get': 'download' }.items()) head = None for tr in tbl_rows[1:]: cells = tr.find_all('td') if 5 > len(cells): continue try: head = head if None is not head else self._header_row( tr) seeders, leechers, size = [ tryInt(n, n) for n in [ cells[head[x]].get_text().strip() for x in 'seed', 'leech', 'size' ] ] if not tr.find( 'a', href=rc['cats']) or self._reject_item( seeders, leechers, container=self.reject_m2ts and (re.search(r'(?i)\[.*?m2?ts.*?\]', tr.get_text('', strip=True)))): continue title = tr.select('td span[title]')[0].attrs.get( 'title').strip() download_url = self._link( tr.find('a', href=rc['get'])['href']) except (AttributeError, TypeError, ValueError, KeyError, IndexError): continue if title and download_url: results.append((title, download_url, seeders, self._bytesizer(size)))
def _search_provider(self, search_params, **kwargs): results = [] if not self._authorised(): return results items = {'Cache': [], 'Season': [], 'Episode': [], 'Propers': []} rc = dict((k, re.compile('(?i)' + v)) for (k, v) in { 'info': 'detail', 'get': 'download', 'fl': 'free' }.items()) for mode in search_params.keys(): for search_string in search_params[mode]: search_string = isinstance( search_string, unicode) and unidecode(search_string) or search_string search_string = re.sub('(?i)[^a-z0-9\s]', '%', unquote_plus(search_string)) kwargs = dict( post_data={ 'keywords': search_string, 'do': 'quick_sort', 'page': '0', 'category': '0', 'search_type': 't_name', 'sort': 'added', 'order': 'desc', 'daysprune': '-1' }) vals = [i for i in range(5, 16)] random.SystemRandom().shuffle(vals) attempts = html = soup = torrent_table = None fetch = 'failed fetch' for attempts, s in enumerate((0, vals[0], vals[5], vals[10])): time.sleep(s) html = self.get_url(self.urls['search'], **kwargs) if self.should_skip(): return results if html: soup = BeautifulSoup(html, 'html.parser') torrent_table = soup.find('table', id='sortabletable') if torrent_table: fetch = 'data fetched' break if attempts: logger.log('%s %s after %s attempts' % (mode, fetch, attempts + 1)) cnt = len(items[mode]) try: if not html or self._has_no_results( html) or not torrent_table: raise generic.HaltParseException torrent_rows = torrent_table.find_all('tr') get_detail = True if 2 > len(torrent_rows): raise generic.HaltParseException head = None for tr in torrent_rows[1:]: cells = tr.find_all('td') if 6 > len(cells): continue try: head = head if None is not head else self._header_row( tr) seeders, leechers, size = [ tryInt(n, n) for n in [ cells[head[x]].get_text().strip() for x in 'seed', 'leech', 'size' ] ] if self._reject_item( seeders, leechers, self.freeleech and (None is cells[1].find( 'img', title=rc['fl']))): continue info = tr.find('a', href=rc['info']) title = (tr.find( 'div', class_='tooltip-content').get_text() or info.get_text()).strip() title = re.findall('(?m)(^[^\r\n]+)', title)[0] download_url = self._link( tr.find('a', href=rc['get'])['href']) except (StandardError, Exception): continue if get_detail and title.endswith('...'): try: with BS4Parser( self.get_url('%s%s' % ( self. urls['config_provider_home_uri'], info['href'].lstrip('/').replace( self.urls[ 'config_provider_home_uri'], ''))), 'html.parser') as soup_detail: title = soup_detail.find( 'td', class_='thead', attrs={ 'colspan': '3' }).get_text().strip() title = re.findall('(?m)(^[^\r\n]+)', title)[0] except IndexError: continue except (StandardError, Exception): get_detail = False try: titles = self.regulate_title( title, mode, search_string) if download_url and titles: for title in titles: items[mode].append( (title, download_url, seeders, self._bytesizer(size))) except (StandardError, Exception): pass except generic.HaltParseException: pass
def _search_provider(self, search_params, **kwargs): results = [] if not self._authorised(): return results items = {'Cache': [], 'Season': [], 'Episode': [], 'Propers': []} rc = dict( (k, re.compile('(?i)' + v)) for (k, v) in { 'info': 'details', 'get': 'dl.php', 'snatch': 'snatches', 'seeders': r'(^\d+)', 'leechers': r'(\d+)$' }.items()) log = '' if self.filter: non_marked = 'f0' in self.filter # if search_any, use unselected to exclude, else use selected to keep filters = ([f for f in self.may_filter if f in self.filter], [f for f in self.may_filter if f not in self.filter])[non_marked] rc['filter'] = re.compile('(?i)(%s)' % '|'.join([ self.may_filter[f][2] for f in filters if self.may_filter[f][1] ])) log = '%sing (%s) ' % (('keep', 'skipp')[non_marked], ', '.join( [self.may_filter[f][0] for f in filters])) for mode in search_params.keys(): rc['cats'] = re.compile( '(?i)cat=(?:%s)' % self._categories_string(mode, template='', delimiter='|')) for search_string in search_params[mode]: search_string = isinstance( search_string, unicode) and unidecode(search_string) or search_string search_url = self.urls['search'] % ('+'.join( search_string.split()), self._categories_string(mode)) html = self.get_url(search_url) if self.should_skip(): return results time.sleep(2) if not self.has_all_cookies(['session_key']): if not self._authorised(): return results html = self.get_url(search_url) if self.should_skip(): return results cnt = len(items[mode]) try: if not html or self._has_no_results(html): raise generic.HaltParseException with BS4Parser(html, features=['html5lib', 'permissive']) as soup: torrent_table = soup.find('table', id='tortable') torrent_rows = [] if not torrent_table else torrent_table.find_all( 'tr') if 2 > len(torrent_rows): raise generic.HaltParseException head = None for tr in torrent_rows[1:]: cells = tr.find_all('td') if 4 > len(cells): continue if any(self.filter): marker = '' try: marker = tr.select('a[href^="browse"] .tip' )[0].get_text().strip() except (StandardError, Exception): pass if ((non_marked and rc['filter'].search(marker)) or (not non_marked and not rc['filter'].search(marker))): continue try: head = head if None is not head else self._header_row( tr) seeders, leechers = 2 * [ cells[head['seed']].get_text().strip() ] seeders, leechers = [ tryInt(n) for n in [ rc['seeders'].findall(seeders)[0], rc['leechers'].findall(leechers)[0] ] ] if self._peers_fail(mode, seeders, leechers) or\ not rc['cats'].findall(tr.find('td').get('onclick', ''))[0]: continue title = tr.find( 'a', href=rc['info']).get_text().strip() snatches = tr.find( 'a', href=rc['snatch']).get_text().strip() size = cells[ head['size']].get_text().strip().replace( snatches, '') download_url = self._link( tr.find('a', href=rc['get'])['href']) except (AttributeError, TypeError, ValueError, IndexError): continue if title and download_url: items[mode].append( (title, download_url, seeders, self._bytesizer(size))) except generic.HaltParseException: pass except (StandardError, Exception): logger.log( u'Failed to parse. Traceback: %s' % traceback.format_exc(), logger.ERROR) self._log_search(mode, len(items[mode]) - cnt, log + self.session.response.get('url')) results = self._sort_seeding(mode, results + items[mode]) return results
def _search_provider(self, search_params, **kwargs): results = [] if not self._authorised(): return results items = {'Cache': [], 'Season': [], 'Episode': [], 'Propers': []} rc = dict((k, re.compile('(?i)' + v)) for (k, v) in { 'info': 'detail', 'get': 'download' }.items()) for mode in search_params.keys(): for search_string in search_params[mode]: search_string = isinstance( search_string, unicode) and unidecode(search_string) or search_string search_url = self.urls['browse'] % ( self._categories_string(), ('3', '0')[not self.freeleech], (self.urls['search'] % search_string, '')['Cache' == mode]) html = self.get_url(search_url) if self.should_skip(): return results cnt = len(items[mode]) try: if not html or self._has_no_results(html): raise generic.HaltParseException html = html.replace( '<?xml version="1.0" encoding="iso-8859-1"?>', '') html = re.sub(r'(</td>)[^<]*</td>', r'\1', html) html = re.sub(r'(<a[^<]*)<a[^<]*?href=details[^<]*', r'\1', html) with BS4Parser(html, 'html.parser') as soup: shows_found = False torrent_rows = soup.find_all('tr') for index, row in enumerate(torrent_rows): if 'type' == row.find_all( 'td')[0].get_text().strip().lower(): shows_found = index break if not shows_found or 2 > (len(torrent_rows) - shows_found): raise generic.HaltParseException head = None for tr in torrent_rows[1 + shows_found:]: cells = tr.find_all('td') if 4 > len(cells): continue try: head = head if None is not head else self._header_row( torrent_rows[shows_found]) seeders, leechers, size = [ tryInt(n, n) for n in [ cells[head[x]].get_text().strip() for x in 'seed', 'leech', 'size' ] ] if self._reject_item(seeders, leechers): continue info = tr.find('a', href=rc['info']) title = (info.attrs.get('title') or info.get_text()).strip() download_url = self._link( tr.find('a', href=rc['get'])['href']) except (AttributeError, TypeError, ValueError, KeyError): continue if title and download_url: items[mode].append( (title, download_url, seeders, self._bytesizer(size)))
def _search_provider(self, search_params, search_mode='eponly', epcount=0, **kwargs): results = [] if not self.url: return results items = {'Cache': [], 'Season': [], 'Episode': [], 'Propers': []} rc = dict((k, re.compile('(?i)' + v)) for (k, v) in { 'info': 'detail', 'get': 'download[^"]+magnet', 'tid': r'.*/(\d{5,}).*', 'verify': '(?:helper|moderator|trusted|vip)', 'size': 'size[^\d]+(\d+(?:[.,]\d+)?\W*[bkmgt]\w+)'}.items()) for mode in search_params.keys(): for search_string in search_params[mode]: search_string = isinstance(search_string, unicode) and unidecode(search_string) or search_string search_url = self.urls['browse'] if 'Cache' == mode \ else self.urls['search'] % (urllib.quote(search_string)) html = self.get_url(search_url) if self.should_skip(): return results cnt = len(items[mode]) try: if not html or self._has_no_results(html): self._url = None raise generic.HaltParseException with BS4Parser(html, features=['html5lib', 'permissive'], attr='id="searchResult"') as soup: torrent_table = soup.find(id='searchResult') torrent_rows = [] if not torrent_table else torrent_table.find_all('tr') if 2 > len(torrent_rows): raise generic.HaltParseException head = None for tr in torrent_table.find_all('tr')[1:]: cells = tr.find_all('td') if 3 > len(cells): continue try: head = head if None is not head else self._header_row(tr) seeders, leechers = [tryInt(cells[head[x]].get_text().strip()) for x in 'seed', 'leech'] if self._reject_item(seeders, leechers): continue info = tr.find('a', title=rc['info']) title = info.get_text().strip().replace('_', '.') tid = rc['tid'].sub(r'\1', str(info['href'])) download_magnet = tr.find('a', title=rc['get'])['href'] except (AttributeError, TypeError, ValueError): continue if self.confirmed and not tr.find('img', title=rc['verify']): logger.log(u'Skipping untrusted non-verified result: ' + title, logger.DEBUG) continue # Check number video files = episode in season and # find the real Quality for full season torrent analyzing files in torrent if 'Season' == mode and 'sponly' == search_mode: ep_number = int(epcount / len(set(show_name_helpers.allPossibleShowNames(self.show)))) title = self._find_season_quality(title, tid, ep_number) if title and download_magnet: size = None try: size = rc['size'].findall(tr.find_all(class_='detDesc')[0].get_text())[0] except (StandardError, Exception): pass items[mode].append((title, download_magnet, seeders, self._bytesizer(size))) except generic.HaltParseException: pass except (StandardError, Exception): logger.log(u'Failed to parse. Traceback: %s' % traceback.format_exc(), logger.ERROR) self._log_search(mode, len(items[mode]) - cnt, search_url) results = self._sort_seeding(mode, results + items[mode]) return results
def _search_provider(self, search_params, **kwargs): results = [] if not self._authorised(): return results items = {'Cache': [], 'Season': [], 'Episode': [], 'Propers': []} rc = dict( (k, re.compile('(?i)' + v)) for (k, v) in { 'info': 'detail', 'get': '.*id=(\d+).*', 'fl': '\[freeleech\]', 'cats': 'cat=(?:%s)' % self._categories_string(template='', delimiter='|') }.items()) for mode in search_params.keys(): for search_string in search_params[mode]: search_string = isinstance( search_string, unicode) and unidecode(search_string) or search_string post_data = self.urls['params'].copy() post_data.update( ast.literal_eval('{%s}' % self._categories_string( template='"c%s": "1"', delimiter=','))) if 'Cache' != mode: search_string = '+'.join(search_string.split()) post_data['search'] = search_string if self.freeleech: post_data.update({'freeleech': 'on'}) self.session.headers.update({ 'Referer': self.url + 'browse.php', 'X-Requested-With': 'XMLHttpRequest' }) html = self.get_url(self.urls['browse'], post_data=post_data) cnt = len(items[mode]) try: if not html or self._has_no_results(html): raise generic.HaltParseException with BS4Parser(html, features=['html5lib', 'permissive']) as soup: torrent_table = soup.find('table', attrs={'cellpadding': 5}) torrent_rows = [] if not torrent_table else torrent_table.find_all( 'tr') if 2 > len(torrent_rows): raise generic.HaltParseException for tr in torrent_rows[1:]: try: seeders, leechers, size = [ tryInt(n, n) for n in [ tr.find_all('td') [x].get_text().strip() for x in (-2, -1, -3) ] ] if None is tr.find('a', href=rc['cats'])\ or self.freeleech and None is rc['fl'].search(tr.find_all('td')[1].get_text())\ or self._peers_fail(mode, seeders, leechers): continue info = tr.find('a', href=rc['info']) title = 'title' in info.attrs and info.attrs[ 'title'] or info.get_text().strip() download_url = self.urls['get'] % { 'id': re.sub(rc['get'], r'\1', str(info.attrs['href'])), 'title': str(title).replace(' ', '.') } except (AttributeError, TypeError, ValueError): continue if title and download_url: items[mode].append( (title, download_url, seeders, self._bytesizer(size))) except generic.HaltParseException: pass except Exception: logger.log( u'Failed to parse. Traceback: %s' % traceback.format_exc(), logger.ERROR) self._log_search(mode, len(items[mode]) - cnt, ('search string: ' + search_string, self.name)['Cache' == mode]) self._sort_seeders(mode, items) results = list(set(results + items[mode])) return results
def _search_provider(self, search_params, **kwargs): results = [] if not self._authorised(): return results items = {'Cache': [], 'Season': [], 'Episode': [], 'Propers': []} rc = dict((k, re.compile('(?i)' + v)) for (k, v) in { 'info': 'detail', 'get': 'download', 'fl': 'free' }.items()) for mode in search_params.keys(): save_url, restore = self._set_categories(mode) if self.should_skip(): return results for search_string in search_params[mode]: search_string = search_string.replace(u'£', '%') search_string = re.sub(r'[\s.]+', '%', search_string) search_string = isinstance( search_string, unicode) and unidecode(search_string) or search_string kwargs = dict( post_data={ 'keywords': search_string, 'do': 'quick_sort', 'page': '0', 'category': '0', 'search_type': 't_name', 'sort': 'added', 'order': 'desc', 'daysprune': '-1' }) html = self.get_url(self.urls['search'], **kwargs) if self.should_skip(): return results cnt = len(items[mode]) try: if not html or self._has_no_results(html): raise generic.HaltParseException parse_only = dict(table={ 'id': (lambda at: at and 'sortabletable' in at) }) with BS4Parser(html, parse_only=parse_only) as tbl: tbl_rows = [] if not tbl else tbl.find_all('tr') get_detail = True if 2 > len(tbl_rows): raise generic.HaltParseException head = None for tr in tbl_rows[1:]: cells = tr.find_all('td') if 6 > len(cells): continue try: head = head if None is not head else self._header_row( tr) seeders, leechers, size = [ tryInt(n, n) for n in [ cells[head[x]].get_text().strip() for x in 'seed', 'leech', 'size' ] ] if self._reject_item( seeders, leechers, self.freeleech and (None is cells[1].find( 'img', title=rc['fl']))): continue info = tr.find('a', href=rc['info']) title = (tr.find( 'div', class_='tooltip-content').get_text() or info.get_text()).strip() title = re.findall('(?m)(^[^\r\n]+)', title)[0] download_url = self._link( tr.find('a', href=rc['get'])['href']) except (BaseException, Exception): continue if get_detail and title.endswith('...'): try: with BS4Parser( self.get_url('%s%s' % ( self.urls[ 'config_provider_home_uri'], info['href'].lstrip('/'). replace( self.urls[ 'config_provider_home_uri'], '')))) as soup_detail: title = soup_detail.find( 'td', class_='thead', attrs={ 'colspan': '3' }).get_text().strip() title = re.findall( '(?m)(^[^\r\n]+)', title)[0] except IndexError: continue except (BaseException, Exception): get_detail = False title = self.regulate_title(title) if download_url and title: items[mode].append( (title, download_url, seeders, self._bytesizer(size)))
def _doSearch(self, search_params): results = [] items = {'Season': [], 'Episode': []} freeleech = '&free=on' if sickbeard.IPTORRENTS_FREELEECH else '' if not self._doLogin(): return [] for mode in search_params.keys(): for search_string in search_params[mode]: # URL with 50 tv-show results, or max 150 if adjusted in IPTorrents profile searchURL = self.urls['search'] % ( self.categorie, freeleech, unidecode(search_string)) + ';o=seeders' logger.log(u"" + self.name + " search page URL: " + searchURL, logger.DEBUG) data = self.getURL(searchURL) if not data: continue try: html = BeautifulSoup(data, features=["html5lib", "permissive"]) if not html: logger.log(u"Invalid HTML data: " + str(data), logger.DEBUG) continue if html.find(text='Nothing found!'): logger.log( u"No results found for: " + search_string + " (" + searchURL + ")", logger.DEBUG) continue torrent_table = html.find('table', attrs={'class': 'torrents'}) torrents = torrent_table.find_all( 'tr') if torrent_table else [] if not torrents: # logger.log(u"The data returned from " + self.name + " is incomplete, this result is unusable", logger.DEBUG) continue for result in torrents[1:]: torrent = result.find_all('td')[1].find('a') torrent_name = torrent.string torrent_download_url = self.urls['base_url'] + ( result.find_all('td')[3].find('a'))['href'] torrent_details_url = self.urls['base_url'] + torrent[ 'href'] torrent_seeders = int( result.find('td', attrs={ 'class': 'ac t_seeders' }).string) ## Not used, perhaps in the future ## #torrent_id = int(torrent['href'].replace('/details.php?id=', '')) #torrent_leechers = int(result.find('td', attrs = {'class' : 'ac t_leechers'}).string) # Filter unseeded torrent and torrents with no name/url if torrent_seeders == 0 or not torrent_name or not torrent_download_url: continue item = torrent_name, torrent_download_url logger.log( u"Found result: " + torrent_name + " (" + torrent_details_url + ")", logger.DEBUG) items[mode].append(item) except Exception, e: logger.log( u"Failed parsing " + self.name + (" Exceptions: " + str(e) if e else ''), logger.ERROR) results += items[mode]
def _doSearch(self, search_params, search_mode='eponly', epcount=0, age=0, epObj=None): results = [] items = {'Season': [], 'Episode': [], 'RSS': []} if not self._doLogin(): return results for mode in search_params.keys(): for search_string in search_params[mode]: if isinstance(search_string, unicode): search_string = unidecode(search_string) searchURL = self.urls['search'] % (search_string, self.catagories) data = self.getURL(searchURL) if not data: continue try: with BS4Parser(data, features=["html5lib", "permissive"]) as html: torrent_table = html.find( 'table', attrs={'id': 'torrent_table'}) torrent_rows = torrent_table.find_all( 'tr') if torrent_table else [] #Continue only if one Release is found if len(torrent_rows) < 2: logger.log( u"The Data returned from " + self.name + " does not contain any torrents", logger.DEBUG) continue for result in torrent_rows[1:]: cells = result.find_all('td') link = result.find('a', attrs={'dir': 'ltr'}) url = result.find('a', attrs={'title': 'Download'}) try: title = link.contents[0] download_url = self.urls['download'] % ( url['href']) id = link['href'][-6:] seeders = cells[len(cells) - 2].contents[0] leechers = cells[len(cells) - 1].contents[0] except (AttributeError, TypeError): continue #Filter unseeded torrent if mode != 'RSS' and (seeders < self.minseed or leechers < self.minleech): continue if not title or not download_url: continue item = title, download_url, id, seeders, leechers logger.log( u"Found result: " + title + "(" + searchURL + ")", logger.DEBUG) items[mode].append(item) except Exception, e: logger.log( u"Failed parsing " + self.name + " Traceback: " + traceback.format_exc(), logger.ERROR) #For each search mode sort all the items by seeders items[mode].sort(key=lambda tup: tup[3], reverse=True) results += items[mode]
def _search_provider(self, search_params, **kwargs): results = [] items = {'Cache': [], 'Season': [], 'Episode': [], 'Propers': []} rc = dict( (k, re.compile('(?i)' + v)) for (k, v) in { 'abd': '(\d{4}(?:[.]\d{2}){2})', 'peers': 'Seed[^\d]*(\d+)[\w\W]*?Leech[^\d]*(\d+)', 'info': '(\w+)[.]html' }.items()) for mode in search_params.keys(): for search_string in search_params[mode]: search_string = isinstance( search_string, unicode) and unidecode(search_string) or search_string search_string = '+'.join(rc['abd'].sub(r'%22\1%22', search_string).split()) search_url = self.urls['search'] % ( search_string, self._categories_string(mode, '', ',')) html = self.get_url(search_url) cnt = len(items[mode]) try: if not html or self._has_no_results(html): raise generic.HaltParseException html = html.replace('</a> </i>', '</a>').replace( '"href=', '" href=').replace('"style', '" style') with BS4Parser(html, features=['html5lib', 'permissive']) as soup: torrent_table = soup.find('table', class_='table-torrents') torrent_rows = [] if not torrent_table else torrent_table.find_all( 'tr') if 2 > len(torrent_rows): raise generic.HaltParseException head = None for tr in torrent_rows[1:]: cells = tr.find_all('td') if 4 > len(cells): continue try: head = head if None is not head else self._header_row( tr, { 'peers': r'(?:zqf\-cloud)', 'size': r'(?:zqf\-files)' }) stats = rc['peers'].findall( (cells[head['peers']].find( class_='progress') or {}).get('title', '')) seeders, leechers = any(stats) and [ tryInt(x) for x in stats[0] ] or (0, 0) if self._peers_fail(mode, seeders, leechers): continue info = cells[1].find( 'a', href=rc['info']) or cells[0].find( 'a', href=rc['info']) title = info and info.get_text().strip() size = cells[head['size']].get_text().strip() download_url = info and ( self.urls['get'] % rc['info'].findall(info['href'])[0]) except (AttributeError, TypeError, ValueError, IndexError): continue if title and download_url: items[mode].append( (title, download_url, seeders, self._bytesizer(size))) except generic.HaltParseException: pass except (StandardError, Exception): logger.log( u'Failed to parse. Traceback: %s' % traceback.format_exc(), logger.ERROR) self._log_search(mode, len(items[mode]) - cnt, search_url) results = self._sort_seeding(mode, results + items[mode]) return results
def _doSearch(self, search_params, search_mode='eponly', epcount=0, age=0, epObj=None): results = [] items = {'Season': [], 'Episode': [], 'RSS': []} for mode in search_params.keys(): for search_string in search_params[mode]: if isinstance(search_string, unicode): search_string = unidecode(search_string) if mode != 'RSS': searchURL = self.url + 'usearch/%s/?field=seeders&sorder=desc&rss=1' % urllib.quote_plus( search_string) else: searchURL = self.url + 'tv/?field=time_add&sorder=desc&rss=1' logger.log(u"Search string: " + searchURL, logger.DEBUG) try: entries = self.cache.getRSSFeed(searchURL)['entries'] for item in entries or []: try: link = item['link'] id = item['guid'] title = item['title'] url = item['torrent_magneturi'] verified = bool(int(item['torrent_verified']) or 0) seeders = int(item['torrent_seeds']) leechers = int(item['torrent_peers']) size = int(item['torrent_contentlength']) except (AttributeError, TypeError): continue if mode != 'RSS' and (seeders < self.minseed or leechers < self.minleech): continue if self.confirmed and not verified: logger.log( u"KAT Provider found result " + title + " but that doesn't seem like a verified result so I'm ignoring it", logger.DEBUG) continue #Check number video files = episode in season and find the real Quality for full season torrent analyzing files in torrent if mode == 'Season' and search_mode == 'sponly': ep_number = int( epcount / len(set(allPossibleShowNames(self.show)))) title = self._find_season_quality( title, link, ep_number) if not title or not url: continue try: pubdate = datetime.datetime( *item['published_parsed'][0:6]) except AttributeError: try: pubdate = datetime.datetime( *item['updated_parsed'][0:6]) except AttributeError: try: pubdate = datetime.datetime( *item['created_parsed'][0:6]) except AttributeError: try: pubdate = datetime.datetime( *item['date'][0:6]) except AttributeError: pubdate = datetime.datetime.today() item = title, url, id, seeders, leechers, size, pubdate items[mode].append(item) except Exception, e: logger.log( u"Failed to parsing " + self.name + " Traceback: " + traceback.format_exc(), logger.ERROR) #For each search mode sort all the items by seeders items[mode].sort(key=lambda tup: tup[3], reverse=True) results += items[mode]
def _search_provider(self, search_params, **kwargs): results = [] if not self._authorised(): return results items = {'Cache': [], 'Season': [], 'Episode': [], 'Propers': []} rc = dict((k, re.compile('(?i)' + v)) for (k, v) in {'get': 'download'}.items()) for mode in search_params.keys(): for search_string in search_params[mode]: search_url = self.urls[ ('search', 'browse')['Cache' == mode]] % { 'cats': self._categories_string(mode, '', ','), 'query': isinstance(search_string, unicode) and unidecode(search_string) or search_string } html = self.get_url(search_url) cnt = len(items[mode]) try: if not html or self._has_no_results(html): raise generic.HaltParseException with BS4Parser(html, features=['html5lib', 'permissive']) as soup: torrent_table = soup.find(id='torrenttable') torrent_rows = [] if not torrent_table else torrent_table.find_all( 'tr') if 2 > len(torrent_rows): raise generic.HaltParseException head = None for tr in torrent_rows[1:]: cells = tr.find_all('td') if 6 > len(cells): continue try: head = head if None is not head else self._header_row( tr) seeders, leechers = [ tryInt(n) for n in [ tr.find('td', class_=x).get_text().strip() for x in 'seeders', 'leechers' ] ] if self._peers_fail(mode, seeders, leechers): continue info = tr.find('td', class_='name').a title = (info.attrs.get('title') or info.get_text()).strip() size = cells[head['size']].get_text().strip() download_url = self._link( tr.find('a', href=rc['get'])['href']) except (AttributeError, TypeError, ValueError): continue if title and download_url: items[mode].append( (title, download_url, seeders, self._bytesizer(size)))
def _search_provider(self, search_params, **kwargs): results = [] if not self._authorised(): return results items = {'Cache': [], 'Season': [], 'Episode': [], 'Propers': []} rc = dict((k, re.compile('(?i)' + v)) for (k, v) in { 'info': 'details', 'get': 'download' }.items()) for mode in search_params.keys(): rc['cats'] = re.compile( '(?i)cat=(?:%s)' % self._categories_string(mode, template='', delimiter='|')) for search_string in search_params[mode]: search_string = isinstance( search_string, unicode) and unidecode(search_string) or search_string html = self.get_url( self.urls['search'] % ('+'.join(search_string.split()), self._categories_string(mode, template='cats[]=%s'))) cnt = len(items[mode]) try: if not html or self._has_no_results(html): raise generic.HaltParseException with BS4Parser(html, features=['html5lib', 'permissive']) as soup: torrent_rows = soup.find_all('div', 'torrentrow') if not len(torrent_rows): raise generic.HaltParseException for tr in torrent_rows: cells = tr.select('span[style*="cell"]') if 6 > len(cells): continue try: seeders, leechers, size = [ tryInt(n, n) for n in [ cells[x].get_text().strip() for x in -3, -2, -5 ] ] if self._peers_fail(mode, seeders, leechers) or not tr.find( 'a', href=rc['cats']): continue title = tr.find( 'a', href=rc['info']).get_text().strip() download_url = self._link( tr.find('a', href=rc['get'])['href']) except (AttributeError, TypeError, ValueError, IndexError): continue if title and download_url: items[mode].append( (title, download_url, seeders, self._bytesizer(size)))
def _do_search(self, search_params, search_mode='eponly', epcount=0, age=0): results = [] if not self._do_login(): return results items = {'Season': [], 'Episode': [], 'Cache': []} rc = dict((k, re.compile('(?i)' + v)) for (k, v) in { 'info': 'detail', 'get': 'download' }.items()) for mode in search_params.keys(): for search_string in search_params[mode]: if isinstance(search_string, unicode): search_string = unidecode(search_string) search_url = self.urls['search'] % (search_string, self.categories) html = self.get_url(search_url) cnt = len(items[mode]) try: if not html or self._has_no_results(html): raise generic.HaltParseException with BS4Parser(html, 'html.parser') as soup: torrent_table = soup.find('table', attrs={'class': 'koptekst'}) torrent_rows = [] if not torrent_table else torrent_table.find_all( 'tr') if 2 > len(torrent_rows): raise generic.HaltParseException for tr in torrent_rows[1:]: try: seeders, leechers = [ int( tr.find_all('td') [x].get_text().strip()) for x in (-3, -2) ] if 'Cache' != mode and ( seeders < self.minseed or leechers < self.minleech): continue info = tr.find('a', href=rc['info']) title = info.get_text().strip() download_url = self.urls['get'] % str( tr.find( 'a', href=rc['get'])['href']).lstrip('/') except (AttributeError, TypeError): continue if title and download_url: items[mode].append( (title, download_url, seeders)) except generic.HaltParseException: pass except Exception: logger.log( u'Failed to parse. Traceback: %s' % traceback.format_exc(), logger.ERROR) self._log_result(mode, len(items[mode]) - cnt, search_url) # for each search mode sort all the items by seeders items[mode].sort(key=lambda tup: tup[2], reverse=True) results += items[mode] return results
def _search_provider(self, search_params, **kwargs): results = [] if not self._authorised(): return results items = {'Cache': [], 'Season': [], 'Episode': [], 'Propers': []} rc = dict((k, re.compile('(?i)' + v)) for (k, v) in {'info': '.*?details\s*-\s*', 'get': 'download'}.items()) log = '' if self.filter: non_marked = 'f0' in self.filter # if search_any, use unselected to exclude, else use selected to keep filters = ([f for f in self.may_filter if f in self.filter], [f for f in self.may_filter if f not in self.filter])[non_marked] filters += (((all([x in filters for x in 'free', 'double']) and ['freedouble'] or []) + (all([x in filters for x in 'half', 'double']) and ['halfdouble'] or [])), ((not all([x not in filters for x in 'free', 'double']) and ['freedouble'] or []) + (not all([x not in filters for x in 'half', 'double']) and ['halfdouble'] or [])) )[non_marked] rc['filter'] = re.compile('(?i)^(%s)$' % '|'.join( ['%s' % f for f in filters if (f in self.may_filter and self.may_filter[f][1]) or f])) log = '%sing (%s) ' % (('keep', 'skipp')[non_marked], ', '.join( [f in self.may_filter and self.may_filter[f][0] or f for f in filters])) for mode in search_params.keys(): if mode in ['Season', 'Episode']: show_type = self.show.air_by_date and 'Air By Date' \ or self.show.is_sports and 'Sports' or self.show.is_anime and 'Anime' or None if show_type: logger.log(u'Provider does not carry shows of type: [%s], skipping' % show_type, logger.DEBUG) return results for search_string in search_params[mode]: search_string = isinstance(search_string, unicode) and unidecode(search_string) or search_string search_url = self.urls['search'] % ( '+'.join(search_string.split()), self._categories_string(mode, '')) html = self.get_url(search_url) if self.should_skip(): return results cnt = len(items[mode]) try: if not html or self._has_no_results(html): raise generic.HaltParseException with BS4Parser(html, features=['html5lib', 'permissive']) as soup: torrent_table = soup.find('table', class_='table') torrent_rows = [] if not torrent_table else torrent_table.find_all('tr') if 2 > len(torrent_rows): raise generic.HaltParseException head = None for tr in torrent_rows[1:]: cells = tr.find_all('td') if 5 > len(cells) or (self.confirmed and tr.find('i', title=re.compile('(?i)unverified'))): continue if any(self.filter): marked = ','.join([x.attrs.get('title', '').lower() for x in tr.find_all( 'i', attrs={'class': ['fa-star', 'fa-diamond', 'fa-star-half-o']})]) munged = ''.join(filter(marked.__contains__, ['free', 'half', 'double'])) if ((non_marked and rc['filter'].search(munged)) or (not non_marked and not rc['filter'].search(munged))): continue try: head = head if None is not head else self._header_row(tr) seeders, leechers, size = [tryInt(n, n) for n in [ cells[head[x]].get_text().strip() for x in 'seed', 'leech', 'size']] if self._peers_fail(mode, seeders, leechers): continue title = rc['info'].sub('', tr.find('a', attrs={'title': rc['info']})['title']) download_url = self._link(tr.find('a', href=rc['get'])['href']) except (AttributeError, TypeError, ValueError, IndexError): continue if title and download_url: items[mode].append((title, download_url, seeders, self._bytesizer(size)))
def _search_provider(self, search_params, **kwargs): results = [] if not self._authorised(): return results items = {'Cache': [], 'Season': [], 'Episode': [], 'Propers': []} rc = dict((k, re.compile('(?i)' + v)) for (k, v) in {'info': 'detail', 'get': 'download', 'cats': 'cat=(?:%s)' % self._categories_string(template='', delimiter='|') }.items()) for mode in search_params.keys(): for search_string in search_params[mode]: search_string = isinstance(search_string, unicode) and unidecode(search_string) or search_string search_url = self.urls['search'] % (self._categories_string(mode), search_string) html = self.get_url(search_url, timeout=self.url_timeout) cnt = len(items[mode]) try: if not html or self._has_no_results(html): raise generic.HaltParseException with BS4Parser(html, features=['html5lib', 'permissive']) as soup: torrent_table = soup.find('td', attrs={'class': 'colhead'}).find_parent('table') torrent_rows = [] if not torrent_table else torrent_table.find_all('tr') if 2 > len(torrent_rows): raise generic.HaltParseException for tr in torrent_rows[1:]: try: info = tr.find('a', href=rc['info']) if not info: continue seeders, leechers, size = [tryInt(n, n) for n in [ (tr.find_all('td')[x].get_text().strip()) for x in (-2, -1, -4)]] if None is tr.find('a', href=rc['cats']) or self._peers_fail(mode, seeders, leechers): continue title = 'title' in info.attrs and info.attrs['title'] or info.get_text().strip() download_url = self.urls['get'] % tr.find('a', href=rc['get']).get('href') except (AttributeError, TypeError, ValueError): continue if title and download_url: items[mode].append((title, download_url, seeders, self._bytesizer(size))) except (generic.HaltParseException, AttributeError): pass except Exception: logger.log(u'Failed to parse. Traceback: %s' % traceback.format_exc(), logger.ERROR) self._log_search(mode, len(items[mode]) - cnt, search_url) self._sort_seeders(mode, items) results = list(set(results + items[mode])) return results
def _search_provider(self, search_params, **kwargs): results = [] if not self._authorised(): return results items = {'Cache': [], 'Season': [], 'Episode': [], 'Propers': []} rc = dict( (k, re.compile('(?i)' + v)) for (k, v) in { 'info': 'torrent-details', 'get': 'download', 'peers': 'page=peers', 'nodots': '[\.\s]+' }.items()) log = '' if self.filter: non_marked = 'f0' in self.filter # if search_any, use unselected to exclude, else use selected to keep filters = ([f for f in self.may_filter if f in self.filter], [f for f in self.may_filter if f not in self.filter])[non_marked] rc['filter'] = re.compile('(?i)(%s).png' % '|'.join([ self.may_filter[f][2] for f in filters if self.may_filter[f][1] ])) log = '%sing (%s) ' % (('keep', 'skipp')[non_marked], ', '.join( [self.may_filter[f][0] for f in filters])) for mode in search_params.keys(): for search_string in search_params[mode]: search_string = isinstance( search_string, unicode) and unidecode(search_string) or search_string search_url = self.urls['browse'] + self._categories_string( template='', delimiter=';') if 'Cache' != mode: search_url += self.urls['search'] % rc['nodots'].sub( ' ', search_string) html = self.get_url(search_url) if self.should_skip(): return results cnt = len(items[mode]) try: if not html or self._has_no_results(html): raise generic.HaltParseException with BS4Parser(html, features=['html5lib', 'permissive'], attr='width="100%"\Wclass="lista"') as soup: torrent_table = soup.find_all('table', class_='lista')[-1] torrent_rows = [] if not torrent_table else torrent_table.find_all( 'tr') if 2 > len(torrent_rows): raise generic.HaltParseException head = None for tr in torrent_rows[1:]: cells = tr.find_all('td') if (6 > len(cells) or tr.find('td', class_='header') or (any(self.filter) and ((non_marked and tr.find('img', src=rc['filter'])) or (not non_marked and not tr.find('img', src=rc['filter']))))): continue downlink = tr.find('a', href=rc['get']) if None is downlink: continue try: head = head if None is not head else self._header_row( tr) seeders, leechers = [ tryInt(x.get_text().strip()) for x in tr.find_all('a', href=rc['peers']) ] if self._reject_item(seeders, leechers): continue info = tr.find('a', href=rc['info']) title = (info.attrs.get('title') or info.get_text()).strip() size = cells[head['size']].get_text().strip() download_url = self._link(downlink['href']) except (AttributeError, TypeError, ValueError): continue if title and download_url: items[mode].append( (title, download_url, seeders, self._bytesizer(size))) except generic.HaltParseException: pass except (StandardError, Exception): logger.log( u'Failed to parse. Traceback: %s' % traceback.format_exc(), logger.ERROR) self._log_search(mode, len(items[mode]) - cnt, log + search_url) results = self._sort_seeding(mode, results + items[mode]) return results
def _doSearch(self, search_params, show=None): results = [] items = {'Season': [], 'Episode': [], 'RSS': []} if not self._doLogin(): return [] for mode in search_params.keys(): for search_string in search_params[mode]: if isinstance(search_string, unicode): search_string = unidecode(search_string) searchURL = self.urls['search'] % (search_string, self.categories) logger.log(u"Search string: " + searchURL, logger.DEBUG) data = self.getURL(searchURL) if not data: continue try: html = BeautifulSoup(data, features=["html5lib", "permissive"]) torrent_table = html.find('table', attrs={'id': 'torrents-table'}) torrent_rows = torrent_table.find_all( 'tr') if torrent_table else [] #Continue only if one Release is found if len(torrent_rows) < 2: logger.log( u"The Data returned from " + self.name + " do not contains any torrent", logger.DEBUG) continue for result in torrent_table.find_all('tr')[1:]: try: link = result.find('td', attrs={ 'class': 'ttr_name' }).find('a') url = result.find('td', attrs={ 'class': 'td_dl' }).find('a') title = link.string download_url = self.urls['download'] % url['href'] id = int(link['href'].replace('details?id=', '')) seeders = int( result.find('td', attrs={ 'class': 'ttr_seeders' }).string) leechers = int( result.find('td', attrs={ 'class': 'ttr_leechers' }).string) except (AttributeError, TypeError): continue if mode != 'RSS' and seeders == 0: continue if not title or not download_url: continue item = title, download_url, id, seeders, leechers logger.log( u"Found result: " + title + "(" + searchURL + ")", logger.DEBUG) items[mode].append(item) except Exception, e: logger.log( u"Failed parsing " + self.name + " Traceback: " + traceback.format_exc(), logger.ERROR) #For each search mode sort all the items by seeders items[mode].sort(key=lambda tup: tup[3], reverse=True) results += items[mode]
def _search_provider(self, search_params, **kwargs): results = [] if not self._authorised(): return results items = {'Cache': [], 'Season': [], 'Episode': [], 'Propers': []} rc = dict((k, re.compile('(?i)' + v)) for (k, v) in { 'info': 'detail', 'get': 'download\.', 'fl': '\[\W*F\W?L\W*\]' }.items()) for mode in search_params.keys(): for search_string in search_params[mode]: search_string = isinstance( search_string, unicode) and unidecode(search_string) or search_string search_url = self.urls['search'] % ( search_string, self._categories_string(mode)) html = self.get_url(search_url, timeout=90) cnt = len(items[mode]) try: if not html or self._has_no_results( html) or 'width=750' not in html: raise generic.HaltParseException html = re.sub(r'</td>([^<]*)<tr>', '</td></tr>\1<tr>', html) with BS4Parser(html, 'html.parser', attr='width=750') as soup: torrent_table = soup.find('table', attrs={'width': 750}) torrent_rows = [] if not torrent_table else torrent_table.find_all( 'tr') if 2 > len(torrent_rows): raise generic.HaltParseException head = None for tr in torrent_rows[1:]: cells = tr.find_all('td') if 6 > len(cells): continue try: head = head if None is not head else self._header_row( tr) seeders, leechers, size = [ tryInt(n, n) for n in [ cells[head[x]].get_text().strip() for x in 'seed', 'leech', 'size' ] ] if self.freeleech and not tr.attrs.get('bgcolor').endswith('FF99') or \ self._peers_fail(mode, seeders, leechers): continue info = tr.find('a', href=rc['info']) title = (info.attrs.get('title') or info.get_text()).strip() download_url = self._link( tr.find('a', href=rc['get'])['href']) except (AttributeError, TypeError, ValueError, KeyError): continue if title and download_url: items[mode].append( (title, download_url, seeders, self._bytesizer(size)))
def _doSearch(self, search_params, search_mode='eponly', epcount=0, age=0): results = [] items = {'Season': [], 'Episode': [], 'RSS': []} if self.proxy and self.proxy.isEnabled(): self.headers.update({'referer': self.proxy.getProxyURL()}) for mode in search_params.keys(): for search_string in search_params[mode]: if mode != 'RSS': searchURL = self.proxy._buildURL(self.searchurl % (urllib.quote(unidecode(search_string)))) else: searchURL = self.proxy._buildURL(self.url + 'tv/latest/') logger.log(u"Search string: " + searchURL, logger.DEBUG) data = self.getURL(searchURL) if not data: continue re_title_url = self.proxy._buildRE(self.re_title_url) #Extracting torrent information from data returned by searchURL match = re.compile(re_title_url, re.DOTALL).finditer(urllib.unquote(data)) for torrent in match: title = torrent.group('title').replace('_', '.') #Do not know why but SickBeard skip release with '_' in name url = torrent.group('url') id = int(torrent.group('id')) seeders = int(torrent.group('seeders')) leechers = int(torrent.group('leechers')) #Filter unseeded torrent if mode != 'RSS' and (seeders < self.minseed or leechers < self.minleech): continue #Accept Torrent only from Good People for every Episode Search if self.confirmed and re.search('(VIP|Trusted|Helper|Moderator)', torrent.group(0)) is None: logger.log(u"ThePirateBay Provider found result " + torrent.group( 'title') + " but that doesn't seem like a trusted result so I'm ignoring it", logger.DEBUG) continue #Check number video files = episode in season and find the real Quality for full season torrent analyzing files in torrent if mode == 'Season' and search_mode == 'sponly': ep_number = int(epcount / len(set(allPossibleShowNames(self.show)))) title = self._find_season_quality(title, id, ep_number) if not title or not url: continue item = title, url, id, seeders, leechers items[mode].append(item) #For each search mode sort all the items by seeders items[mode].sort(key=lambda tup: tup[3], reverse=True) results += items[mode] return results
def _search_provider(self, search_params, **kwargs): results = [] if not self._authorised(): return results items = {'Cache': [], 'Season': [], 'Episode': [], 'Propers': []} rc = dict((k, re.compile('(?i)' + v)) for (k, v) in { 'show_id': r'"show\?id=(\d+)[^>]+>([^<]+)<\/a>', 'get': 'load_torrent' }.items()) search_types = sorted([x for x in search_params.items()], key=lambda tup: tup[0], reverse=True) maybe_only = search_types[0][0] show_detail = '_only' in maybe_only and search_params.pop( maybe_only)[0] or '' for mode in search_params.keys(): for search_string in search_params[mode]: if 'Cache' == mode: search_url = self.urls['browse'] html = self.get_url(search_url) if self.should_skip(): return results else: search_string = isinstance( search_string, unicode) and unidecode(search_string) or search_string search_string = search_string.replace(show_detail, '').strip() search_url = self.urls['search'] % search_string html = self.get_url(search_url) if self.should_skip(): return results shows = rc['show_id'].findall(html) if any(shows): html = '' for show in set(shows): sid, title = show if title in unquote_plus(search_string): html and time.sleep(1.1) html += self.get_url(self.urls['show'] % sid) if self.should_skip(): return results cnt = len(items[mode]) try: if not html or self._has_no_results(html): raise generic.HaltParseException with BS4Parser(html) as tbl: tbl_rows = tbl.tbody.find_all( 'tr') or tbl.table.find_all('tr') or [] if 2 > len(tbl_rows): raise generic.HaltParseException head = None for tr in tbl_rows[0:]: cells = tr.find_all('td') if 4 > len(cells): continue try: head = head if None is not head else self._header_row( tr) stats = cells[head['leech']].get_text().strip() seeders, leechers = [ (tryInt(x[0], 0), tryInt(x[1], 0)) for x in re.findall( r'(?::(\d+))(?:\W*[/]\W*:(\d+))?', stats) if x[0] ][0] if self._reject_item(seeders, leechers): continue sizes = [ (tryInt(x[0], x[0]), tryInt(x[1], False)) for x in re.findall( r'([\d.]+\w+)?(?:\s*[(\[](\d+)[)\]])?', stats) if x[0] ][0] size = sizes[(0, 1)[1 < len(sizes)]] for element in [ x for x in cells[2].contents[::-1] if unicode(x).strip() ]: if 'NavigableString' in str( element.__class__): title = unicode(element).strip() break download_url = self._link( tr.find('a', href=rc['get'])['href']) except (AttributeError, TypeError, ValueError): continue if title and download_url: items[mode].append( (title, download_url, seeders, self._bytesizer(size))) except generic.HaltParseException: pass except (BaseException, Exception): logger.log( u'Failed to parse. Traceback: %s' % traceback.format_exc(), logger.ERROR) self._log_search(mode, len(items[mode]) - cnt, search_url) results = self._sort_seeding(mode, results + items[mode]) return results
def _search_provider(self, search_params, **kwargs): results = [] if not self._authorised(): return results items = {'Cache': [], 'Season': [], 'Episode': [], 'Propers': []} rc = dict( (k, re.compile('(?i)' + v)) for (k, v) in { 'get': 'info.php\?id', 'valid_cat': 'cat=(?:0|50[12])', 'filter': 'free', 'title': r'Download\s*([^\s]+).*', 'seeders': r'(^\d+)', 'leechers': r'(\d+)$' }.items()) for mode in search_params.keys(): for search_string in search_params[mode]: search_string = isinstance( search_string, unicode) and unidecode(search_string) or search_string search_url = self.urls['search'] % search_string html = self.get_url(search_url) if self.should_skip(): return results cnt = len(items[mode]) try: if not html or self._has_no_results(html): raise generic.HaltParseException with BS4Parser(html, features=['html5lib', 'permissive']) as soup: torrent_table = soup.find('table', 'listor') torrent_rows = [] if not torrent_table else torrent_table.find_all( 'tr') if 2 > len(torrent_rows): raise generic.HaltParseException head = None for tr in torrent_rows[1:]: cells = tr.find_all('td') if 5 > len(cells): continue try: head = head if None is not head else self._header_row( tr, {'seed': r'(?:see/lee|seed)'}) seeders, leechers = 2 * [ cells[head['seed']].get_text().strip() ] seeders, leechers = [ tryInt(n) for n in [ rc['seeders'].findall(seeders)[0], rc['leechers'].findall(leechers)[0] ] ] if self._peers_fail(mode, seeders, leechers) or not tr.find('a', href=rc['valid_cat']) \ or (self.freeleech and not tr.find('img', src=rc['filter'])): continue info = tr.find('a', href=rc['get']) title = (rc['title'].sub( r'\1', info.attrs.get('title', '')) or info.get_text()).strip() size = cells[head['size']].get_text().strip() download_url = self._link(info['href']) except (AttributeError, TypeError, ValueError, KeyError, IndexError): continue if title and download_url: items[mode].append( (title, download_url, seeders, self._bytesizer(size))) except generic.HaltParseException: pass except (StandardError, Exception): logger.log( u'Failed to parse. Traceback: %s' % traceback.format_exc(), logger.ERROR) self._log_search(mode, len(items[mode]) - cnt, search_url) results = self._sort_seeding(mode, results + items[mode]) return results
def _doSearch(self, search_params, epcount=0, age=0): results = [] items = {'Season': [], 'Episode': [], 'RSS': []} if not self._doLogin(): return [] for mode in search_params.keys(): for search_string in search_params[mode]: if isinstance(search_string, unicode): search_string = unidecode(search_string) nonsceneSearchURL = None foreignSearchURL = None if mode == 'Season': searchURL = self.urls['archive'] % (search_string) data = [self.getURL(searchURL, headers=self.headers)] else: searchURL = self.urls['search'] % (search_string, self.categories) nonsceneSearchURL = self.urls['nonscene'] % (search_string) foreignSearchURL = self.urls['foreign'] % (search_string) data = [ self.getURL(searchURL, headers=self.headers), self.getURL(nonsceneSearchURL, headers=self.headers), self.getURL(foreignSearchURL, headers=self.headers) ] logger.log(u"Search string: " + nonsceneSearchURL, logger.DEBUG) logger.log(u"Search string: " + foreignSearchURL, logger.DEBUG) logger.log(u"Search string: " + searchURL, logger.DEBUG) if not data: continue try: for dataItem in data: html = BeautifulSoup( dataItem, features=["html5lib", "permissive"]) torrent_table = html.find( 'table', attrs={'id': 'torrents-table'}) torrent_rows = torrent_table.find_all( 'tr') if torrent_table else [] #Continue only if at least one Release is found if len(torrent_rows) < 2: if html.title: source = self.name + " (" + html.title.string + ")" else: source = self.name logger.log( u"The Data returned from " + source + " does not contain any torrent", logger.DEBUG) continue for result in torrent_table.find_all('tr')[1:]: try: link = result.find('td', attrs={ 'class': 'ttr_name' }).find('a') all_urls = result.find('td', attrs={ 'class': 'td_dl' }).find_all('a', limit=2) # Foreign section contain two links, the others one if self._isSection('Foreign', dataItem): url = all_urls[1] else: url = all_urls[0] title = link.string if re.search('\.\.\.', title): details_html = BeautifulSoup( self.getURL(self.url + "/" + link['href'])) title = re.search( '(?<=").+(?<!")', details_html.title.string).group(0) download_url = self.urls['download'] % url[ 'href'] id = int(link['href'].replace( 'details?id=', '')) seeders = int( result.find('td', attrs={ 'class': 'ttr_seeders' }).string) leechers = int( result.find('td', attrs={ 'class': 'ttr_leechers' }).string) except (AttributeError, TypeError): continue if mode != 'RSS' and (seeders == 0 or seeders < self.minseed or leechers < self.minleech): continue if not title or not download_url: continue item = title, download_url, id, seeders, leechers if self._isSection('Non-Scene', dataItem): logger.log( u"Found result: " + title + "(" + nonsceneSearchURL + ")", logger.DEBUG) elif self._isSection('Foreign', dataItem): logger.log( u"Found result: " + title + "(" + foreignSearchURL + ")", logger.DEBUG) else: logger.log( u"Found result: " + title + "(" + searchURL + ")", logger.DEBUG) items[mode].append(item) except Exception, e: logger.log( u"Failed parsing " + self.name + " Traceback: " + traceback.format_exc(), logger.ERROR) #For each search mode sort all the items by seeders items[mode].sort(key=lambda tup: tup[3], reverse=True) results += items[mode]
def _search_provider(self, search_params, **kwargs): results = [] if not self._authorised(): return results items = {'Cache': [], 'Season': [], 'Episode': [], 'Propers': []} rc = dict((k, re.compile('(?i)' + v)) for (k, v) in { 'info': 'detail', 'get': 'download', 'fl': 'free' }.items()) for mode in search_params.keys(): for search_string in search_params[mode]: search_string = isinstance( search_string, unicode) and unidecode(search_string) or search_string if 'Cache' != mode: kwargs = dict( post_data={ 'keywords': search_string, 'do': 'quick_sort', 'page': '0', 'category': '0', 'search_type': 't_name', 'sort': 'added', 'order': 'desc', 'daysprune': '-1' }) html = self.get_url(self.urls['search'], **kwargs) cnt = len(items[mode]) try: if not html or self._has_no_results(html): raise generic.HaltParseException with BS4Parser(html, 'html.parser') as soup: torrent_table = soup.find('table', id='sortabletable') torrent_rows = [] if not torrent_table else torrent_table.find_all( 'tr') get_detail = True if 2 > len(torrent_rows): raise generic.HaltParseException for tr in torrent_rows[1:]: try: seeders, leechers, size = [ tryInt(n, n) for n in [ tr.find_all('td') [x].get_text().strip() for x in (-3, -2, -5) ] ] if self._peers_fail(mode, seeders, leechers) \ or self.freeleech and None is tr.find_all('td')[1].find('img', title=rc['fl']): continue info = tr.find('a', href=rc['info']) title = (tr.find('div', attrs={ 'class': 'tooltip-content' }).get_text() or info.get_text()).strip() title = re.findall('(?m)(^[^\r\n]+)', title)[0] download_url = self.urls['get'] % str( tr.find('a', href=rc['get'])['href'] ).lstrip('/').replace( self.urls['config_provider_home_uri'], '') except Exception: continue if get_detail and title.endswith('...'): try: with BS4Parser( self.get_url('%s%s' % ( self.urls[ 'config_provider_home_uri'], info['href'].lstrip('/'). replace( self.urls[ 'config_provider_home_uri'], ''))), 'html.parser') as soup_detail: title = soup_detail.find( 'td', attrs={ 'colspan': '3', 'class': 'thead' }).get_text().strip() title = re.findall( '(?m)(^[^\r\n]+)', title)[0] except IndexError: continue except Exception: get_detail = False try: has_series = re.findall( '(?i)(.*?series[^\d]*?\d+)(.*)', title) if has_series: rc_xtras = re.compile( '(?i)([. _-]|^)(special|extra)s?\w*([. _-]|$)' ) has_special = rc_xtras.findall( has_series[0][1]) if has_special: title = has_series[0][0] + rc_xtras.sub( list( set( list(has_special[0][0]) + list(has_special[0][2]))) [0], has_series[0][1]) title = re.sub('(?i)series', r'Season', title) title_parts = re.findall( '(?im)^(.*?)(?:Season[^\d]*?(\d+).*?)?(?:(?:pack|part|pt)\W*?)?(\d+)[^\d]*?of[^\d]*?(?:\d+)(.*?)$', title) if len(title_parts): new_parts = [ tryInt(part, part.strip()) for part in title_parts[0] ] if not new_parts[1]: new_parts[1] = 1 new_parts[2] = ('E%02d', ' Pack %d')[ mode in 'Season'] % new_parts[2] title = '%s.S%02d%s.%s' % tuple(new_parts) dated = re.findall( '(?i)([\(\s]*)((?:\d\d\s)?[adfjmnos]\w{2,}\s+(?:19|20)\d\d)([\)\s]*)', title) if dated: title = title.replace( ''.join(dated[0]), '%s%s%s' % (('', ' ')[1 < len(dated[0][0])], parse( dated[0][1]).strftime('%Y-%m-%d'), ('', ' ')[1 < len(dated[0][2])])) add_pad = re.findall( '((?:19|20)\d\d\-\d\d\-\d\d)([\w\W])', title) if len(add_pad) and add_pad[0][1] not in [ ' ', '.' ]: title = title.replace( ''.join(add_pad[0]), '%s %s' % (add_pad[0][0], add_pad[0][1])) if title and download_url: items[mode].append( (title, download_url, seeders, self._bytesizer(size))) except Exception: pass except generic.HaltParseException: pass except Exception: logger.log( u'Failed to parse. Traceback: %s' % traceback.format_exc(), logger.ERROR) self._log_search(mode, len(items[mode]) - cnt, ('search string: ' + search_string.replace('%', ' '), self.name)['Cache' == mode]) if mode in 'Season' and len(items[mode]): break self._sort_seeders(mode, items) results = list(set(results + items[mode])) return results