def _search_provider(self, search_params, **kwargs): results = [] if not self._authorised(): return results last_recent_search = self.last_recent_search last_recent_search = '' if not last_recent_search else last_recent_search.replace( 'id-', '') for mode in search_params: urls = [] for search_string in search_params[mode]: urls += [[]] search_string = unidecode(search_string) search_url = self.urls['search'] % ( self._categories_string(), '+'.join( search_string.replace('.', ' ').split()), ('', '&freeleech=on')[self.freeleech]) for page in range((3, 5)['Cache' == mode])[:-1]: urls[-1] += [search_url + '&page=%s' % page] results += self._search_urls(mode, last_recent_search, urls) last_recent_search = '' return results
def _search_provider(self, search_params, **kwargs): results = [] if not self._authorised(): return results last_recent_search = self.last_recent_search last_recent_search = '' if not last_recent_search else last_recent_search.replace( 'id-', '') for mode in search_params: urls = [] for search_string in search_params[mode]: urls += [[]] search_string = unidecode(search_string) or search_string for page in range((3, 5)['Cache' == mode])[1:]: # URL with 50 tv-show results, or max 150 if adjusted in IPTorrents profile urls[-1] += [ self.urls['search'] % (self._categories_string(mode, '%s', ';'), search_string, (';free', '')[not self.freeleech], (';o=seeders', '')['Cache' == mode], page) ] results += self._search_urls(mode, last_recent_search, urls) last_recent_search = '' return results
def _search_provider(self, search_params, **kwargs): results = [] if not self._authorised(): return results last_recent_search = self.last_recent_search last_recent_search = '' if not last_recent_search else last_recent_search.replace( 'id-', '') for mode in search_params: urls = [] for search_string in search_params[mode]: urls += [[]] for page in range((3, 5)['Cache' == mode])[1:]: urls[-1] += [ self.urls[('search', 'browse')['Cache' == mode]] % { 'cats': self._categories_string(mode, '', ','), 'query': unidecode(search_string) or search_string, 'x': '%spage/%s' % (('facets/tags:FREELEECH/', '')[not self.freeleech], page) } ] results += self._search_urls(mode, last_recent_search, urls) last_recent_search = '' return results
def _search_provider(self, search_params, **kwargs): self._authorised() results = [] items = {'Cache': [], 'Season': [], 'Episode': [], 'Propers': []} url = self.urls['browse'] % self.passkey for mode in search_params: for search_string in search_params[mode]: search_string = unidecode(search_string) search_url = url + (self.urls['search'] % search_string, '')['Cache' == mode] xml_data = self.cache.get_rss(search_url) cnt = len(items[mode]) if xml_data and 'entries' in xml_data: for entry in xml_data['entries']: try: if entry['title'] and 'download' in entry['link']: items[mode].append((entry['title'], entry['link'], None, None)) except KeyError: continue self._log_search(mode, len(items[mode]) - cnt, search_url) results = list(set(results + items[mode])) return results
def _search_provider(self, search_params, **kwargs): results = [] items = {'Cache': [], 'Season': [], 'Episode': [], 'Propers': []} rc = dict([(k, re.compile('(?i)' + v)) for (k, v) in iteritems({ 'seed': r'seed[^\d/]+([\d]+)', 'leech': r'leech[^\d/]+([\d]+)', 'size': r'size[^\d/]+([^/]+)', 'get': '(.*download.*)', 'title': r'NUKED\b\.(.*)$' })]) for mode in search_params: for search_string in search_params[mode]: search_string = unidecode(search_string) search_string = search_string.replace(' ', '.') search_url = self.urls['search'] % ( self.api_key, self._categories_string(mode, template='%s', delimiter=','), search_string) resp = self.get_url(search_url) if self.should_skip(): return results data = feedparser.parse(resp) tr = data and data.get('entries', []) or [] cnt = len(items[mode]) for item in tr: try: seeders, leechers, size = [ try_int(n, n) for n in [ rc[x].findall(item.summary)[0].strip() for x in ('seed', 'leech', 'size') ] ] if self._reject_item(seeders, leechers): continue title = rc['title'].sub(r'\1', item.title.strip()) download_url = self._link(rc['get'].findall( getattr(item, 'link', ''))[0]) except (BaseException, Exception): continue if download_url and title: items[mode].append((title, download_url, seeders, self._bytesizer(size))) time.sleep(1.1) self._log_search(mode, len(items[mode]) - cnt, search_url) results = self._sort_seeding(mode, results + items[mode]) return results
def _search_provider(self, search_params, **kwargs): results = [] if not self._authorised(): return results items = {'Cache': [], 'Season': [], 'Episode': [], 'Propers': []} rc = dict([(k, re.compile('(?i)' + v)) for (k, v) in iteritems({'nodots': r'[\.\s]+'})]) for mode in search_params: for search_string in search_params[mode]: search_string = unidecode(search_string) search_url = self.urls['browse'] % (self.user_authkey, self.user_passkey) if 'Cache' != mode: search_url += self.urls['search'] % rc['nodots'].sub('+', search_string) data_json = self.get_url(search_url, parse_json=True) if self.should_skip(): return results cnt = len(items[mode]) try: for item in data_json.get('response', {}).get('results', []): if self.freeleech and not item.get('isFreeleech'): continue seeders, leechers, group_name, torrent_id, size = [try_int(n, n) for n in [ item.get(x) for x in ['seeders', 'leechers', 'groupName', 'torrentId', 'size']]] if self._reject_item(seeders, leechers): continue try: title_parts = group_name.split('[') maybe_res = re.findall(r'((?:72|108|216)0\w)', title_parts[1]) maybe_ext = re.findall('(?i)(%s)' % '|'.join(common.mediaExtensions), title_parts[1]) detail = title_parts[1].split('/') detail[1] = detail[1].strip().lower().replace('mkv', 'x264') title = '%s.%s' % (BS4Parser(title_parts[0].strip()).soup.string, '.'.join( (maybe_res and [maybe_res[0]] or []) + [detail[0].strip(), detail[1], maybe_ext and maybe_ext[0].lower() or 'mkv'])) except (IndexError, KeyError): title = self.regulate_title(item, group_name) download_url = self.urls['get'] % (self.user_authkey, self.user_passkey, torrent_id) if title and download_url: items[mode].append((title, download_url, seeders, self._bytesizer(size))) except (BaseException, Exception): logger.log(u'Failed to parse. Traceback: %s' % traceback.format_exc(), logger.ERROR) self._log_search(mode, len(items[mode]) - cnt, search_url) results = self._sort_seeding(mode, results + items[mode]) return results
def clean_show_name(showname): """ :param showname: show name :type showname: AnyStr :return: :rtype: AnyStr """ if not PY2: return re.sub(r'[(\s]*(?:19|20)\d\d[)\s]*$', '', showname) return re.sub(r'[(\s]*(?:19|20)\d\d[)\s]*$', '', unidecode(showname))
def logged_in(self, y): if all([ None is y or 'logout' in y, bool( filter_list(lambda c: 'remember_web_' in c, iterkeys(self.session.cookies))) ]): if None is not y: self.shows = dict( re.findall(r'<option value="(\d+)">(.*?)</option>', y)) for k, v in iteritems(self.shows): self.shows[k] = sanitize_scene_name( html_unescape(unidecode(decode_str(v)))) return True return False
def _search_provider(self, search_params, **kwargs): results = [] self.session.headers['Cache-Control'] = 'max-age=0' last_recent_search = self.last_recent_search last_recent_search = '' if not last_recent_search else last_recent_search.replace( 'id-', '') for mode in search_params: urls = [] for search_string in search_params[mode]: urls += [[]] search_string = unidecode(search_string) search_string = search_string if 'Cache' == mode else search_string.replace( '.', ' ') for page in range((3, 5)['Cache' == mode])[1:]: urls[-1] += [self.urls['search'] % (search_string, page)] results += self._search_urls(mode, last_recent_search, urls) last_recent_search = '' return results
def _search_provider(self, search_params, **kwargs): results = [] if not self._authorised(): return results items = {'Cache': [], 'Season': [], 'Episode': [], 'Propers': []} for mode in search_params: for search_string in search_params[mode]: search_string = unidecode(search_string) search_url = self.urls['search'] % search_string data_json, sess = self.get_url(search_url, headers=dict(Authorization='Bearer %s' % self._token), resp_sess=True, parse_json=True) if self.should_skip(): return results cnt = len(items[mode]) if isinstance(data_json, dict): for tr in data_json.get('torrents') or data_json.get('releases') or []: seeders, leechers, size = (try_int(n, n) for n in [ tr.get(x) for x in ('seeders', 'leechers', 'size')]) if not self._reject_item(seeders, leechers): title = tr.get('releaseName') download_id = tr.get('id') or tr.get('shortId') download_url = download_id and self.urls.get('get') % (download_id, self._dkey) if title and download_url: items[mode].append((title, download_url, seeders, self._bytesizer(size))) elif 200 != getattr(sess, 'response', {}).get('status_code', 0): logger.log('The site search is not working, skipping') break self._log_search(mode, len(items[mode]) - cnt, search_url) results = self._sort_seeding(mode, results + items[mode]) return results
def _search_provider(self, search_params, **kwargs): results = [] if not self._authorised(): return results items = {'Cache': [], 'Season': [], 'Episode': [], 'Propers': []} rc = dict([(k, re.compile('(?i)' + v)) for (k, v) in iteritems({'get': 'magnet'})]) urls = [] for mode in search_params: for search_string in search_params[mode]: if 'Cache' == mode: search_url = self.urls['browse'] else: search_string = unidecode(search_string) show_name = filter_list( lambda x: x.lower() == re.sub(r'\s.*', '', search_string.lower()), list_values(self.shows)) if not show_name: continue search_url = self.urls['search'] % list_keys( self.shows)[list_values(self.shows).index( show_name[0])] if search_url in urls: continue urls += [search_url] html = self.get_url(search_url) if self.should_skip(): return results cnt = len(items[mode]) try: if not html or self._has_no_results(html): raise generic.HaltParseException with BS4Parser(html) as soup: tbl_rows = soup.select('ul.user-timeline > li') if not len(tbl_rows): raise generic.HaltParseException for tr in tbl_rows: try: anchor = tr.find('a', href=rc['get']) title = self.regulate_title(anchor) download_url = self._link(anchor['href']) except (AttributeError, TypeError, ValueError): continue if title and download_url: items[mode].append( (title, download_url, None, None)) except generic.HaltParseException: pass except (BaseException, Exception): logger.log( u'Failed to parse. Traceback: %s' % traceback.format_exc(), logger.ERROR) self._log_search(mode, len(items[mode]) - cnt, search_url) results = self._sort_seeding(mode, results + items[mode]) return results
def _search_provider(self, search_params, **kwargs): results = [] if not self._authorised(): return results items = {'Cache': [], 'Season': [], 'Episode': [], 'Propers': []} rc = dict([(k, re.compile('(?i)' + v)) for (k, v) in iteritems({ 'info': 'detail', 'get': r'download\.' })]) for mode in search_params: for search_string in search_params[mode]: search_string = unidecode(search_string) search_url = self.urls['search'] % ( search_string, self._categories_string(mode, '%s', ',')) html = self.get_url(search_url, timeout=90) if self.should_skip(): return results cnt = len(items[mode]) try: if not html or self._has_no_results(html): raise generic.HaltParseException html = '<table%s' % re.split(r'</table>\s*<table', html)[-1] html = re.sub(r'</td>([^<]*)<tr', r'</td></tr>\1<tr', html) with BS4Parser(html, parse_only='table') as tbl: tbl_rows = [] if not tbl else tbl.find_all('tr') if 2 > len(tbl_rows): raise generic.HaltParseException head = None for tr in tbl_rows[1:]: cells = tr.find_all('td') if 6 > len(cells): continue try: head = head if None is not head else self._header_row( tr) seeders, leechers, size = [ try_int(n, n) for n in [ cells[head[x]].get_text().strip() for x in ('seed', 'leech', 'size') ] ] if self._reject_item( seeders, leechers, self.freeleech and (not tr.attrs.get( 'bgcolor', '').upper().endswith('FF99'))): continue info = tr.find('a', href=rc['info']) title = (info.attrs.get('title') or info.get_text()).strip() download_url = self._link( tr.find('a', href=rc['get'])['href']) except (AttributeError, TypeError, ValueError, KeyError): continue if title and download_url: items[mode].append( (title, download_url, seeders, self._bytesizer(size))) except generic.HaltParseException: pass except (BaseException, Exception): logger.log( u'Failed to parse. Traceback: %s' % traceback.format_exc(), logger.ERROR) self._log_search(mode, len(items[mode]) - cnt, search_url) results = self._sort_seeding(mode, results + items[mode]) return results
def _search_provider(self, search_params, **kwargs): results = [] if not self._authorised(): return results items = {'Cache': [], 'Season': [], 'Episode': [], 'Propers': []} rc = dict([(k, re.compile('(?i)' + v)) for (k, v) in iteritems({ 'info': 'view', 'get': 'download', 'nodots': r'[\.\s]+' })]) for mode in search_params: for search_string in search_params[mode]: search_string = unidecode(search_string) search_url = self.urls['search'] % (self._categories_string( mode, 'filter_cat[%s]=1'), rc['nodots'].sub( '+', search_string)) html = self.get_url(search_url) cnt = len(items[mode]) try: if not html or self._has_no_results( html) or 'Translation: No search results' in html: raise generic.HaltParseException with BS4Parser(html, features=['html5lib', 'permissive']) as soup: torrent_table = soup.find(id='torrent_table') torrent_rows = [] if not torrent_table else torrent_table.find_all( 'tr') if 2 > len(torrent_rows): raise generic.HaltParseException head = None for tr in torrent_rows[1:]: cells = tr.find_all('td') if 5 > len(cells): continue try: head = head if None is not head else self._header_row( tr) seeders, leechers, size = [ try_int(n, n) for n in [ cells[head[x]].get_text().strip() for x in ('seed', 'leech', 'size') ] ] if self._reject_item( seeders, leechers, self.freeleech and (not bool( re.search( r'(?i)>\s*Freeleech!*\s*<', cells[1].encode( formatter='minimal'))))): continue title = self.regulate_title( tr.find( 'a', title=rc['info']).get_text().strip()) download_url = self._link( tr.find('a', title=rc['get'])['href']) except (AttributeError, TypeError, ValueError): continue if title and download_url: items[mode].append( (title, download_url, seeders, self._bytesizer(size))) except generic.HaltParseException: pass except (BaseException, Exception): logger.log( u'Failed to parse. Traceback: %s' % traceback.format_exc(), logger.ERROR) self._log_search(mode, len(items[mode]) - cnt, search_url) results = self._sort_seeding(mode, results + items[mode]) return results
def _search_provider(self, search_params, **kwargs): results = [] if not self.url: return results items = {'Cache': [], 'Season': [], 'Episode': [], 'Propers': []} for mode in search_params: for search_string in search_params[mode]: search_string = unidecode(search_string) search_url = self.urls['browse'] if 'Cache' == mode \ else self.urls['search'] % (quote_plus(search_string)) html = self.get_url(search_url, provider=self) if self.should_skip(): return results cnt = len(items[mode]) try: if not html or self._has_no_results(html): raise generic.HaltParseException with BS4Parser(html, parse_only=dict( table={ 'class': (lambda at: at and bool( re.search(r'table[23\d]*', at))) })) as tbl: tbl_rows = [] if not tbl else tbl.select('tr') for x, tr in enumerate(tbl_rows): row_text = tr.get_text().lower() if not ('torrent' in row_text and 'size' in row_text): tr.decompose() else: break if 5 < x: break tbl_rows = [] if not tbl else tbl.select('tr') if not len(tbl_rows): raise generic.HaltParseException head = None for tr in tbl_rows: cells = tr.find_all('td') if 5 > len(cells): continue try: head = head if None is not head else self._header_row( tr) seeders, leechers, size = [ try_int(n.replace(',', ''), n) for n in [ cells[head[x]].get_text().strip() for x in ('seed', 'leech', 'size') ] ] if self._reject_item(seeders, leechers): continue anchors = tr.td.find_all('a') stats = anchors and [ len(a.get_text()) for a in anchors ] anchor = stats and anchors[stats.index( max(stats))] title = anchor and anchor.get_text().strip() download_url = anchor and self._link( anchor.get('href')) except (AttributeError, TypeError, ValueError, IndexError): continue if title and download_url: items[mode].append( (title, download_url, seeders, self._bytesizer(size))) except generic.HaltParseException: pass except (BaseException, Exception): logger.log( u'Failed to parse. Traceback: %s' % traceback.format_exc(), logger.ERROR) self._log_search(mode, len(items[mode]) - cnt, search_url) results = self._sort_seeding(mode, results + items[mode]) return results
def _search_provider(self, search_params, **kwargs): results = [] if not self._authorised(): return results items = {'Cache': [], 'Season': [], 'Episode': [], 'Propers': []} rc = dict([(k, re.compile('(?i)' + v)) for (k, v) in iteritems({ 'info': r'.*?details\s*-\s*', 'get': 'download' })]) log = '' if self.filter: non_marked = 'f0' in self.filter # if search_any, use unselected to exclude, else use selected to keep filters = ([f for f in self.may_filter if f in self.filter], [f for f in self.may_filter if f not in self.filter])[non_marked] filters += (( (all([x in filters for x in ('free', 'double')]) and ['freedouble'] or []) + (all([x in filters for x in ('half', 'double')]) and ['halfdouble'] or []) ), ((not all([x not in filters for x in ('free', 'double')]) and ['freedouble'] or []) + (not all([x not in filters for x in ('half', 'double')]) and ['halfdouble'] or [])))[non_marked] rc['filter'] = re.compile('(?i)^(%s)$' % '|'.join([ '%s' % f for f in filters if (f in self.may_filter and self.may_filter[f][1]) or f ])) log = '%sing (%s) ' % (('keep', 'skipp')[non_marked], ', '.join([ f in self.may_filter and self.may_filter[f][0] or f for f in filters ])) for mode in search_params: if mode in ['Season', 'Episode']: show_type = self.show_obj.air_by_date and 'Air By Date' \ or self.show_obj.is_sports and 'Sports' or self.show_obj.is_anime and 'Anime' or None if show_type: logger.log( u'Provider does not carry shows of type: [%s], skipping' % show_type, logger.DEBUG) return results for search_string in search_params[mode]: search_string = unidecode(search_string) search_url = self.urls['search'] % ('+'.join( search_string.split()), self._categories_string(mode, '')) html = self.get_url(search_url) if self.should_skip(): return results cnt = len(items[mode]) try: if not html or self._has_no_results(html): raise generic.HaltParseException with BS4Parser( html, parse_only=dict(table={ 'class': (lambda at: at and 'table' in at) })) as tbl: tbl_rows = [] if not tbl else tbl.find_all('tr') if 2 > len(tbl_rows): raise generic.HaltParseException head = None for tr in tbl_rows[1:]: cells = tr.find_all('td') if 5 > len(cells) or (self.confirmed and tr.find( 'i', title=re.compile('(?i)unverified'))): continue if any(self.filter): marked = ','.join([ x.attrs.get('title', '').lower() for x in tr.find_all( 'i', attrs={ 'class': [ 'fa-star', 'fa-diamond', 'fa-star-half-o' ] }) ]) munged = ''.join( filter_iter(marked.__contains__, ['free', 'half', 'double'])) # noinspection PyUnboundLocalVariable if ((non_marked and rc['filter'].search(munged)) or (not non_marked and not rc['filter'].search(munged))): continue try: head = head if None is not head else self._header_row( tr) seeders, leechers, size = [ try_int(n, n) for n in [ cells[head[x]].get_text().strip() for x in ('seed', 'leech', 'size') ] ] if self._reject_item(seeders, leechers): continue title = rc['info'].sub( '', tr.find('a', attrs={'title': rc['info']})['title']) download_url = self._link( tr.find('a', href=rc['get'])['href']) except (AttributeError, TypeError, ValueError, IndexError): continue if title and download_url: items[mode].append( (title, download_url, seeders, self._bytesizer(size))) except generic.HaltParseException: pass except (BaseException, Exception): logger.log( u'Failed to parse. Traceback: %s' % traceback.format_exc(), logger.ERROR) self._log_search(mode, len(items[mode]) - cnt, log + search_url) results = self._sort_seeding(mode, results + items[mode]) return results
def _search_provider(self, search_params, **kwargs): results = [] if not self._authorised(): return results items = {'Cache': [], 'Season': [], 'Episode': [], 'Propers': []} rc = dict([(k, re.compile('(?i)' + v)) for (k, v) in iteritems({ 'info': 'detail', 'cats': 'cat=(?:%s)' % self._categories_string(template='', delimiter='|'), 'get': 'download'})]) for mode in search_params: for search_string in search_params[mode]: search_string = unidecode(search_string) search_url = self.urls['search'] % (self._categories_string(), search_string) html = self.get_url(search_url) if self.should_skip(): return results cnt = len(items[mode]) try: if not html or self._has_no_results(html): raise generic.HaltParseException with BS4Parser(html, parse_only=dict(table={'id': 'torrentsTable'})) as tbl: tbl_rows = [] if not tbl else tbl.find_all('tr') if 2 > len(tbl_rows): raise generic.HaltParseException head = None for tr in tbl_rows[1:]: cells = tr.find_all('td') if 4 > len(cells): continue try: head = head if None is not head else self._header_row(tr) seeders, leechers, size = [try_int(n, n) for n in [ cells[head[x]].get_text().strip() for x in ('seed', 'leech', 'size')]] if None is tr.find('a', href=rc['cats']) or self._reject_item(seeders, leechers): continue info = tr.find('a', href=rc['info']) title = (info.attrs.get('title') or info.get_text()).strip() download_url = self._link(tr.find('a', href=rc['get'])['href']) except (AttributeError, TypeError, ValueError): continue if title and download_url: items[mode].append((title, download_url, seeders, self._bytesizer(size))) except generic.HaltParseException: pass except (BaseException, Exception): logger.log(u'Failed to parse. Traceback: %s' % traceback.format_exc(), logger.ERROR) self._log_search(mode, len(items[mode]) - cnt, search_url) results = self._sort_seeding(mode, results + items[mode]) return results
def _search_provider(self, search_params, **kwargs): results = [] if not self._authorised(): return results items = {'Cache': [], 'Season': [], 'Episode': [], 'Propers': []} rc = dict([(k, re.compile('(?i)' + v)) for (k, v) in iteritems({ 'show_id': r'"show\?id=(\d+)[^>]+>([^<]+)<\/a>', 'get': 'load_torrent' })]) search_types = sorted([x for x in iteritems(search_params)], key=lambda tup: tup[0], reverse=True) maybe_only = search_types[0][0] show_detail = '_only' in maybe_only and search_params.pop( maybe_only)[0] or '' for mode in search_params: for search_string in search_params[mode]: if 'Cache' == mode: search_url = self.urls['browse'] html = self.get_url(search_url) if self.should_skip(): return results else: search_string = unidecode(search_string) search_string = search_string.replace(show_detail, '').strip() search_url = self.urls['search'] % search_string html = self.get_url(search_url) if self.should_skip(): return results shows = rc['show_id'].findall(html) if any(shows): html = '' for show in set(shows): sid, title = show if title in unquote_plus(search_string): html and time.sleep(1.1) html += self.get_url(self.urls['show'] % sid) if self.should_skip(): return results cnt = len(items[mode]) try: if not html or self._has_no_results(html): raise generic.HaltParseException with BS4Parser(html) as tbl: tbl_rows = tbl.tbody and tbl.tbody.find_all( 'tr') or tbl.table and tbl.table.find_all('tr') if 2 > len(tbl_rows or []): raise generic.HaltParseException head = None for tr in tbl_rows[0:]: cells = tr.find_all('td') if 4 > len(cells): continue try: head = head if None is not head else self._header_row( tr) stats = cells[head['leech']].get_text().strip() seeders, leechers = [ (try_int(x[0], 0), try_int(x[1], 0)) for x in re.findall( r'(?::(\d+))(?:\W*[/]\W*:(\d+))?', stats) if x[0] ][0] if self._reject_item(seeders, leechers): continue sizes = [ (try_int(x[0], x[0]), try_int(x[1], False)) for x in re.findall( r'([\d.]+\w+)?(?:\s*[(\[](\d+)[)\]])?', stats) if x[0] ][0] size = sizes[(0, 1)[1 < len(sizes)]] for element in [ x for x in cells[2].contents[::-1] if text_type(x).strip() ]: if 'NavigableString' in str( element.__class__): title = text_type(element).strip() break download_url = self._link( tr.find('a', href=rc['get'])['href']) except (AttributeError, TypeError, ValueError): continue if title and download_url: items[mode].append( (title, download_url, seeders, self._bytesizer(size))) except generic.HaltParseException: pass except (BaseException, Exception): logger.log( u'Failed to parse. Traceback: %s' % traceback.format_exc(), logger.ERROR) self._log_search(mode, len(items[mode]) - cnt, search_url) results = self._sort_seeding(mode, results + items[mode]) return results
def _search_provider(self, search_params, **kwargs): results = [] if not self._authorised(): return results items = {'Cache': [], 'Season': [], 'Episode': [], 'Propers': []} rc = dict([(k, re.compile('(?i)' + v)) for (k, v) in iteritems({'info': 'details', 'get': 'download'})]) log = '' if self.filter: non_marked = 'f0' in self.filter # if search_any, use unselected to exclude, else use selected to keep filters = ([f for f in self.may_filter if f in self.filter], [f for f in self.may_filter if f not in self.filter])[non_marked] rc['filter'] = re.compile('(?i)(%s).png' % '|'.join( [f.replace('f', '') for f in filters if self.may_filter[f][1]])) log = '%sing (%s) ' % (('keep', 'skipp')[non_marked], ', '.join([self.may_filter[f][0] for f in filters])) for mode in search_params: rc['cats'] = re.compile('(?i)category=(?:%s)' % self._categories_string(mode, template='', delimiter='|')) for search_string in search_params[mode]: search_string = unidecode(search_string) search_url = self.urls['search'] % ( search_string, self._categories_string(mode, template='category[]=%s') .replace('&category[]=4489', ('&genre[]=Animation', '')[mode in ['Cache', 'Propers']])) html = self.get_url(search_url) if self.should_skip(): return results cnt = len(items[mode]) try: if not html or self._has_no_results(html): raise generic.HaltParseException html = re.sub(r'(?ims)<div[^>]+display:\s*none;.*?</div>', '', html) html = re.sub('(?im)href=([^\\"][^>]+)>', r'href="\1">', html) html = (html.replace('"/></td>', '" /></a></td>') .replace('"title="', '" title="') .replace('</u></span></a></td>', '</u></a></span></td>')) html = re.sub('(?im)<b([mtwfs][^>]+)', r'<b>\1</b', html) with BS4Parser(html, attr='width="100%"') as soup: tbl_rows = [tr for tr in ([] if not soup else soup.find_all('tr')) if tr.find('a', href=rc['info'])] if not len(tbl_rows): raise generic.HaltParseException head = None for tr in tbl_rows: cells = tr.find_all('td') # noinspection PyUnboundLocalVariable if (6 > len(cells) or any(self.filter) and ((non_marked and tr.find('img', src=rc['filter'])) or (not non_marked and not tr.find('img', src=rc['filter'])))): continue try: head = head if None is not head else self._header_row(tr) seeders, leechers, size = [try_int(n, n) for n in [ cells[head[x]].get_text().strip() for x in ('seed', 'leech', 'size')]] if not tr.find('a', href=rc['cats']) or self._reject_item(seeders, leechers): continue title = tr.find('a', href=rc['info']).get_text().strip() download_url = self._link(tr.find('a', href=rc['get'])['href']) except (AttributeError, TypeError, ValueError, IndexError): continue if title and download_url: items[mode].append((title, download_url, seeders, self._bytesizer(size))) except generic.HaltParseException: pass except (BaseException, Exception): logger.log(u'Failed to parse. Traceback: %s' % traceback.format_exc(), logger.ERROR) self._log_search(mode, len(items[mode]) - cnt, log + search_url) results = self._sort_seeding(mode, results + items[mode]) return results
def _search_provider(self, search_params, **kwargs): results = [] if not self._authorised(): return results items = {'Cache': [], 'Season': [], 'Episode': [], 'Propers': []} rc = dict([(k, re.compile('(?i)' + v)) for (k, v) in iteritems({ 'info': 'details', 'get': 'download' })]) for mode in search_params: rc['cats'] = re.compile( '(?i)cat=(?:%s)' % self._categories_string(mode, template='', delimiter='|')) for search_string in search_params[mode]: search_string = unidecode(search_string) html = self.get_url( self.urls['search'] % ('+'.join(search_string.split()), self._categories_string(mode, template='cats[]=%s'))) if self.should_skip(): return results cnt = len(items[mode]) try: if not html or self._has_no_results(html): raise generic.HaltParseException with BS4Parser(html) as soup: tbl_rows = soup.find_all('div', 'torrentrow') if not len(tbl_rows): raise generic.HaltParseException for tr in tbl_rows: cells = tr.select('span[style*="cell"]') if 6 > len(cells): continue try: seeders, leechers, size = [ try_int(n, n) for n in [ cells[x].get_text().strip() for x in (-3, -2, -5) ] ] if not tr.find( 'a', href=rc['cats']) or self._reject_item( seeders, leechers): continue title = tr.find( 'a', href=rc['info']).get_text().strip() download_url = self._link( tr.find('a', href=rc['get'])['href']) except (AttributeError, TypeError, ValueError, IndexError): continue if title and download_url: items[mode].append( (title, download_url, seeders, self._bytesizer(size))) except generic.HaltParseException: pass except (BaseException, Exception): logger.log( u'Failed to parse. Traceback: %s' % traceback.format_exc(), logger.ERROR) self._log_search(mode, len(items[mode]) - cnt, self.session.response.get('url')) results = self._sort_seeding(mode, results + items[mode]) return results
def _search_provider(self, search_params, **kwargs): results = [] if not self._authorised(): return results items = {'Cache': [], 'Season': [], 'Episode': [], 'Propers': []} rc = dict([(k, re.compile('(?i)' + v)) for (k, v) in iteritems({ 'info': 'detail', 'get': 'download', 'fl': r'\(Freeleech\)' })]) for mode in search_params: for search_string in search_params[mode]: search_string = unidecode(search_string) search_url = self.urls['search'] % ( search_string, self._categories_string(mode), ('3', '0')[not self.freeleech]) html = self.get_url(search_url, timeout=90) if self.should_skip(): return results cnt = len(items[mode]) try: if not html or self._has_no_results(html): raise generic.HaltParseException html = html.replace( '<table width=100% border=0 align=center cellpadding=0 cellspacing=0>', '') html = re.sub( r'(?s)(.*)(<table[^>]*?950[^>]*>.*)(</body>)', r'\1\3', html) html = re.sub(r'(?s)<table[^>]+font[^>]+>', '<table id="parse">', html) html = re.sub( r'(?s)(<td[^>]+>(?!<[ab]).*?)(?:(?:</[ab]>)+)', r'\1', html) html = re.sub(r'(?m)^</td></tr></table>', r'', html) with BS4Parser( html, parse_only=dict(table={'id': 'parse'})) as tbl: tbl_rows = [] if not tbl else tbl.find_all('tr') if 2 > len(tbl_rows): raise generic.HaltParseException head = None for tr in tbl_rows[1:]: cells = tr.find_all('td') if 5 > len(cells): continue try: head = head if None is not head else self._header_row( tr) seeders, leechers, size = [ try_int(n, n) for n in [ cells[head[x]].get_text().strip() for x in ('seed', 'leech', 'size') ] ] if self._reject_item(seeders, leechers): continue info = tr.find('a', href=rc['info']) title = (info.attrs.get('title') or info.get_text().split()[0]).strip() download_url = self._link( tr.find('a', href=rc['get'])['href']) except (AttributeError, TypeError, ValueError, KeyError): continue if title and download_url: items[mode].append( (title, download_url, seeders, self._bytesizer(size))) except generic.HaltParseException: pass except (BaseException, Exception): logger.log( u'Failed to parse. Traceback: %s' % traceback.format_exc(), logger.ERROR) self._log_search(mode, len(items[mode]) - cnt, search_url) results = self._sort_seeding(mode, results + items[mode]) return results
def _search_provider(self, search_params, **kwargs): results = [] if not self._authorised(): return results items = {'Cache': [], 'Season': [], 'Episode': [], 'Propers': []} rc = dict([(k, re.compile('(?i)' + v)) for (k, v) in iteritems({ 'info': r'/torrents?/(?P<tid>(?P<tid_num>\d{2,})[^"]*)', 'get': 'download' })]) for mode in search_params: for search_string in search_params[mode]: search_string = unidecode(unquote_plus(search_string)) vals = [i for i in range(5, 16)] random.SystemRandom().shuffle(vals) attempts = html = soup = tbl = None fetch = 'failed fetch' for attempts, s in enumerate((0, vals[0], vals[5], vals[10])): time.sleep(s) html = self.get_url(self.urls['search'] % (search_string, self._token)) if self.should_skip(): return results if html: try: soup = BS4Parser(html).soup tbl = soup.find('table', class_='table') if tbl: fetch = 'data fetched' break except (BaseException, Exception): pass if attempts: logger.log('%s %s after %s attempts' % (mode, fetch, attempts + 1)) cnt = len(items[mode]) try: if not html or self._has_no_results(html) or not tbl: raise generic.HaltParseException tbl_rows = tbl.find_all('tr') if 2 > len(tbl_rows): raise generic.HaltParseException head = None for tr in tbl_rows[1:]: cells = tr.find_all('td') if 6 > len(cells): continue try: head = head if None is not head else self._header_row( tr) seeders, leechers, size = [ try_int(n, n) for n in [ cells[head[x]].get_text().strip() for x in ('seed', 'leech', 'size') ] ] if self._reject_item( seeders, leechers, self.freeleech and (None is tr.find('i', class_='fa-star'))): continue title = tr.find( 'a', href=rc['info']).get_text().strip() download_url = self._link( tr.find('a', href=rc['get'])['href']) except (BaseException, Exception): continue try: titles = self.regulate_title( title, mode, search_string) if download_url and titles: for title in titles: items[mode].append( (title, download_url, seeders, self._bytesizer(size))) except (BaseException, Exception): pass except generic.HaltParseException: pass except (BaseException, Exception): logger.log( u'Failed to parse. Traceback: %s' % traceback.format_exc(), logger.ERROR) if soup: soup.clear(True) del soup self._log_search(mode, len(items[mode]) - cnt, ('search string: ' + search_string.replace('%', '%%'), self.name)['Cache' == mode]) if mode in 'Season' and len(items[mode]): break results = self._sort_seeding(mode, results + items[mode]) return results
def _search_provider(self, search_params, **kwargs): results = [] if self.show_obj and not self.show_obj.is_anime: return results items = {'Cache': [], 'Season': [], 'Episode': [], 'Propers': []} rc = dict([(k, re.compile('(?i)' + v)) for (k, v) in iteritems({'nodots': r'[\.\s]+'})]) for mode in search_params: for search_string in search_params[mode]: search_string = unidecode(search_string) search_url = self.urls['browse'] if 'Cache' == mode else \ self.urls['search'] % (rc['nodots'].sub(' ', search_string), str(time.time()).replace('.', '3')) data, html = 2 * [None] if 'Cache' == mode: data = self.cache.get_rss(search_url) else: html = self.get_url(search_url) if self.should_skip(): return results cnt = len(items[mode]) try: if None is not data: for cur_item in data.get('entries', []): title, download_url = cur_item.get( 'title'), self._link(cur_item.get('link')) if title and download_url: items[mode].append( (title, download_url, '', '')) if not html or self._has_no_results(html): raise generic.HaltParseException with BS4Parser('<html><body>%s</body></html>' % html) as soup: for link in soup.find_all('a'): try: variants = map_list( lambda t: t.get_text().replace( 'SD', '480p'), link.find_all('span', class_='badge')) map_consume( lambda t: t.decompose(), link.find_all('span') + link.find_all('div')) title = '[HorribleSubs] ' + re.sub( r'\s*\[HorribleSubs\]\s*', '', link.get_text()) download_url = self._link(link.get('href')) if title and download_url: items[mode] += map_list( lambda _v: ('%s [%s]' % (title, _v), '%s-%s' % (download_url, _v), '', ''), variants) except (AttributeError, TypeError, ValueError): continue except generic.HaltParseException: pass except (BaseException, Exception): logger.log( u'Failed to parse. Traceback: %s' % traceback.format_exc(), logger.ERROR) self._log_search(mode, len(items[mode]) - cnt, search_url) results = self._sort_seeding(mode, results + items[mode]) return results
def _search_provider(self, search_params, **kwargs): results = [] if not self._authorised(): return results items = {'Cache': [], 'Season': [], 'Episode': [], 'Propers': []} rc = dict([(k, re.compile('(?i)' + v)) for (k, v) in iteritems({ 'info': 'detail', 'get': 'download', 'fl': 'free' })]) for mode in search_params: save_url, restore = self._set_categories(mode) if self.should_skip(): return results for search_string in search_params[mode]: search_string = search_string.replace(u'£', '%') search_string = re.sub(r'[\s.]+', '%', search_string) search_string = unidecode(search_string) kwargs = dict( post_data={ 'keywords': search_string, 'do': 'quick_sort', 'page': '0', 'category': '0', 'search_type': 't_name', 'sort': 'added', 'order': 'desc', 'daysprune': '-1' }) html = self.get_url(self.urls['search'], **kwargs) if self.should_skip(): return results cnt = len(items[mode]) try: if not html or self._has_no_results(html): raise generic.HaltParseException parse_only = dict(table={ 'id': (lambda at: at and 'sortabletable' in at) }) with BS4Parser(html, parse_only=parse_only) as tbl: tbl_rows = [] if not tbl else tbl.find_all('tr') get_detail = True if 2 > len(tbl_rows): raise generic.HaltParseException head = None for tr in tbl_rows[1:]: cells = tr.find_all('td') if 6 > len(cells): continue try: head = head if None is not head else self._header_row( tr) seeders, leechers, size = [ try_int(n, n) for n in [ cells[head[x]].get_text().strip() for x in ('seed', 'leech', 'size') ] ] if self._reject_item( seeders, leechers, self.freeleech and (None is cells[1].find( 'img', title=rc['fl']))): continue info = tr.find('a', href=rc['info']) title = (tr.find( 'div', class_='tooltip-content').get_text() or info.get_text()).strip() title = re.findall('(?m)(^[^\r\n]+)', title)[0] download_url = self._link( tr.find('a', href=rc['get'])['href']) except (BaseException, Exception): continue if get_detail and title.endswith('...'): try: with BS4Parser( self.get_url('%s%s' % ( self.urls[ 'config_provider_home_uri'], info['href'].lstrip('/'). replace( self.urls[ 'config_provider_home_uri'], '')))) as soup_detail: title = soup_detail.find( 'td', class_='thead', attrs={ 'colspan': '3' }).get_text().strip() title = re.findall( '(?m)(^[^\r\n]+)', title)[0] except IndexError: continue except (BaseException, Exception): get_detail = False title = self.regulate_title(title) if download_url and title: items[mode].append( (title, download_url, seeders, self._bytesizer(size))) except generic.HaltParseException: pass except (BaseException, Exception): logger.log( u'Failed to parse. Traceback: %s' % traceback.format_exc(), logger.ERROR) self._log_search(mode, len(items[mode]) - cnt, ('search string: ' + search_string.replace('%', '%%'), self.name)['Cache' == mode]) if mode in 'Season' and len(items[mode]): break if save_url: self.get_url(save_url, post_data=restore) results = self._sort_seeding(mode, results + items[mode]) return results
def _search_provider(self, search_params, **kwargs): results = [] if self.show_obj and not self.show_obj.is_anime: return results items = {'Cache': [], 'Season': [], 'Episode': [], 'Propers': []} rc = dict([(k, re.compile('(?i)' + v)) for (k, v) in iteritems({ 'info': 'view', 'get': '(?:torrent|magnet:)' })]) for mode in search_params: for search_string in search_params[mode]: search_string = unidecode(search_string) search_url = self.urls['search'] % ( (0, 2)[self.confirmed], search_string.replace('.', ' ')) html = self.get_url(search_url) if self.should_skip(): return results cnt = len(items[mode]) try: if not html or self._has_no_results(html): raise generic.HaltParseException parse_only = dict(table={ 'class': (lambda at: at and 'torrent-list' in at) }) with BS4Parser(html, parse_only=parse_only) as tbl: tbl_rows = [] if not tbl else tbl.find_all('tr') if 2 > len(tbl_rows): raise generic.HaltParseException head = None for tr in tbl_rows[1:]: cells = tr.find_all('td') if 5 > len(cells): continue try: head = head if None is not head else self._header_row( tr) seeders, leechers, size = [ try_int(n, n) for n in [ cells[head[x]].get_text().strip() for x in ('seed', 'leech', 'size') ] ] if self._reject_item(seeders, leechers): continue title = tr.find( 'a', href=rc['info']).get_text().strip() download_url = self._link( tr.find('a', href=rc['get'])['href']) except (AttributeError, TypeError, ValueError, IndexError): continue if title and download_url: items[mode].append( (title, download_url, seeders, self._bytesizer(size))) except generic.HaltParseException: pass except (BaseException, Exception): logger.log( u'Failed to parse. Traceback: %s' % traceback.format_exc(), logger.ERROR) self._log_search(mode, len(items[mode]) - cnt, search_url) results = self._sort_seeding(mode, results + items[mode]) return results
def _search_provider(self, search_params, **kwargs): results = [] items = {'Cache': [], 'Season': [], 'Episode': [], 'Propers': []} rc = dict([(k, re.compile('(?i)' + v)) for (k, v) in iteritems({ 'abd': r'(\d{4}(?:[.]\d{2}){2})', 'peers': r'Seed[^\d]*(\d+)[\w\W]*?Leech[^\d]*(\d+)', 'info': r'(\w+)[.]html', 'get': r'^magnet:' })]) for mode in search_params: for search_string in search_params[mode]: search_string = unidecode(search_string) search_string = '+'.join(rc['abd'].sub(r'%22\1%22', search_string).split()) search_url = self.urls['search'] % ( search_string, self._categories_string(mode, '', ',') + ' %2Blang%3Aen', ('ns', 'dt')['Cache' == mode]) html = self.get_url(search_url) if self.should_skip(): return results cnt = len(items[mode]) try: if not html or self._has_no_results(html): raise generic.HaltParseException html = html.replace('</a> </i>', '</a>').replace( '"href=', '" href=').replace('"style', '" style') parse_only = dict(table={ 'class': (lambda at: at and 'table-torrents' in at) }) with BS4Parser(html, parse_only=parse_only) as tbl: tbl_rows = [] if not tbl else tbl.find_all('tr') if 2 > len(tbl_rows): raise generic.HaltParseException head = None for tr in tbl_rows[1:]: cells = tr.find_all('td') if 4 > len(cells): continue try: head = head if None is not head else self._header_row( tr, { 'peers': r'(?:zqf\-clou)', 'size': r'(?:zqf\-file)', 'down': r'(?:zqf\-down)' }) stats = rc['peers'].findall( (cells[head['peers']].find( class_='progress') or {}).get('title', '')) seeders, leechers = any(stats) and [ try_int(x) for x in stats[0] ] or (0, 0) if self._reject_item(seeders, leechers): continue for cell in (1, 0): info = cells[cell].find('a') if ''.join(re.findall(r'[a-z0-9]+', info.get_text().lower())) in \ re.sub(r'html\?.*', '', ''.join( re.findall(r'[a-z0-9?]+', info['href'].lower()))): break else: info = cells[1].find( 'a', href=rc['info']) or cells[0].find( 'a', href=rc['info']) title = info.get_text().strip() size = cells[head['size']].get_text().strip() download_url = cells[head['down']].find( 'a', href=rc['get'])['href'] except (AttributeError, TypeError, ValueError, IndexError): continue if title and download_url: items[mode].append( (title, download_url, seeders, self._bytesizer(size))) except generic.HaltParseException: pass except (BaseException, Exception): logger.log( u'Failed to parse. Traceback: %s' % traceback.format_exc(), logger.ERROR) self._log_search(mode, len(items[mode]) - cnt, search_url) results = self._sort_seeding(mode, results + items[mode]) return results
def _search_provider(self, search_params, **kwargs): results = [] if not self.url: return results items = {'Cache': [], 'Season': [], 'Episode': [], 'Propers': []} quote_fx = (lambda t: quote(t, safe='~()*!.\'')) for mode in search_params: for search_string in search_params[mode]: search_url = self.url cnt = len(items[mode]) try: for token in self._get_tokens(): if self.should_skip(): return results if not token: continue params = dict(token=token[0], ent=token[1]) if 'Cache' != mode: params.update( {'ss': quote_fx(unidecode(search_string))}) data_json = None vals = [i for i in range(3, 8)] random.SystemRandom().shuffle(vals) for x in vals[0], vals[2], vals[4]: time.sleep(x) params.update(dict(ts=self.ts())) search_url = self.urls[ ('search', 'browse')['Cache' == mode]] % params # decode json below as get resp will false -ve to 'nodata' when no search results html_json = self.get_url(search_url) if None is not html_json: data_json = json.loads(html_json) if data_json or 'Cache' != mode: break if self.should_skip(): return results for item in filter_iter( lambda di: re.match( '(?i).*?(tv|television)', di.get('type', '') or di.get( 'category', '')) and (not self.confirmed or di.get('trusted') or di.get( 'verified')), data_json or {}): seeders, leechers, size = map_list( lambda arg: try_int(*([ item.get(arg[0]) if None is not item.get( arg[0]) else item.get(arg[1]) ]) * 2), (('seeder', 'seed'), ('leecher', 'leech'), ('size', 'size'))) if self._reject_item(seeders, leechers): continue title = item.get('name') or item.get('title') download_url = item.get('magnet') or item.get( 'magnetLink') if not download_url: source = item.get('site') or item.get('source') link = self._link( item.get('url') or item.get('pageLink')) if not source or not link: continue download_url = self.urls['get'] % dict( token=token[0], src=quote_fx(source), url=b64encodestring(quote_fx(link)), ts='%(ts)s') if title and download_url: items[mode].append( (title, download_url, seeders, size)) except generic.HaltParseException: pass except (BaseException, Exception): logger.log( u'Failed to parse. Traceback: %s' % traceback.format_exc(), logger.ERROR) self._log_search(mode, len(items[mode]) - cnt, search_url) results = self._sort_seeding(mode, results + items[mode]) return results
def _search_provider(self, search_params, **kwargs): results = [] if not self._authorised(): return results items = {'Cache': [], 'Season': [], 'Episode': [], 'Propers': []} rc = dict([(k, re.compile('(?i)' + v)) for (k, v) in iteritems({ 'info': 'detail', 'get': 'download', 'nuked': 'nuke', 'filter': 'free' })]) for mode in search_params: for search_string in search_params[mode]: search_string = unidecode(search_string) search_url = self.urls['search'] % ( search_string, self._categories_string(mode, '%s', ',')) html = self.get_url(search_url, timeout=90) if self.should_skip(): return results cnt = len(items[mode]) try: if not html or self._has_no_results(html): raise generic.HaltParseException with BS4Parser(html, attr='cellpadding="5"') as soup: tbl = soup.find('table', class_='browse') tbl_rows = [] if not tbl else tbl.find_all('tr') if 2 > len(tbl_rows): raise generic.HaltParseException head = None for tr in tbl_rows[1:]: cells = tr.find_all('td') if 5 > len(cells): continue try: info = tr.find('a', href=rc['info']) head = head if None is not head else self._header_row( tr) seeders, leechers, size = [ n for n in [ cells[head[x]].get_text().strip() for x in ('leech', 'leech', 'size') ] ] seeders, leechers, size = [ try_int(n, n) for n in list( re.findall(r'^(\d+)[^\d]+?(\d+)', leechers)[0]) + re.findall('^[^\n\t]+', size) ] if self._reject_item( seeders, leechers, self.freeleech and (not tr.find('a', class_=rc['filter'])), self.confirmed and (any([ tr.find('img', alt=rc['nuked']), tr.find('img', class_=rc['nuked']) ]))): continue title = (info.attrs.get('title') or info.get_text()).strip() download_url = self._link( tr.find('a', href=rc['get'])['href']) except (AttributeError, TypeError, ValueError, KeyError): continue if title and download_url: items[mode].append( (title, download_url, seeders, self._bytesizer(size))) except generic.HaltParseException: pass except (BaseException, Exception): logger.log( u'Failed to parse. Traceback: %s' % traceback.format_exc(), logger.ERROR) self._log_search(mode, len(items[mode]) - cnt, search_url) results = self._sort_seeding(mode, results + items[mode]) return results
def _search_provider(self, search_params, **kwargs): results = [] if not self._authorised(): return results items = {'Cache': [], 'Season': [], 'Episode': [], 'Propers': []} rc = dict([(k, re.compile('(?i)' + v)) for (k, v) in iteritems({ 'info': 'torrents', 'get': '(.*?download)(?:_check)?(.*)' })]) log = '' if self.filter: non_marked = 'f0' in self.filter # if search_any, use unselected to exclude, else use selected to keep filters = ([f for f in self.may_filter if f in self.filter], [f for f in self.may_filter if f not in self.filter])[non_marked] filters += (( (all([x in filters for x in ('free', 'double', 'feat')]) and ['freedoublefeat'] or []) + (all([x in filters for x in ('free', 'double')]) and ['freedouble'] or []) + (all([x in filters for x in ('feat', 'double')]) and ['featdouble'] or []) ), (( not all([x not in filters for x in ('free', 'double', 'feat')]) and ['freedoublefeat'] or []) + (not all([x not in filters for x in ('free', 'double')]) and ['freedouble'] or []) + (not all([x not in filters for x in ('feat', 'double')]) and ['featdouble'] or [])))[non_marked] rc['filter'] = re.compile(r'(?i)^(%s)$' % '|'.join([ '%s' % f for f in filters if (f in self.may_filter and self.may_filter[f][1]) or f ])) log = '%sing (%s) ' % (('keep', 'skipp')[non_marked], ', '.join([ f in self.may_filter and self.may_filter[f][0] or f for f in filters ])) for mode in search_params: if mode in ['Season', 'Episode']: show_type = self.show_obj.air_by_date and 'Air By Date' \ or self.show_obj.is_sports and 'Sports' or None if show_type: logger.log( u'Provider does not carry shows of type: [%s], skipping' % show_type, logger.DEBUG) return results for search_string in search_params[mode]: search_string = unidecode(search_string) search_url = self.urls['search'] % ( self.token, search_string.replace('.', ' '), self._categories_string(template=''), '', '', '') resp = self.get_url(search_url) if self.should_skip(): return results resp_json = None if None is not self.resp: try: resp_json = json.loads(resp) except (BaseException, Exception): pass cnt = len(items[mode]) try: if not resp or (resp_json and not resp_json.get('rows')): raise generic.HaltParseException html = '<html><body>%s</body></html>' % \ (resp if None is self.resp else self.resp.replace('</tbody>', '%s</tbody>' % ''.join(resp_json.get('result', [])))) with BS4Parser( html, parse_only=dict(table={ 'class': (lambda at: at and 'table' in at) })) as tbl: tbl_rows = [] if not tbl else tbl.find_all('tr') if 2 > len(tbl_rows): raise generic.HaltParseException head = None for tr in tbl_rows[1:]: cells = tr.find_all('td') if 5 > len(cells): continue if any(self.filter): marked = ','.join([ x.attrs.get('data-original-title', '').lower() for x in tr.find_all( 'i', attrs={ 'class': [ 'text-gold', 'fa-diamond', 'fa-certificate' ] }) ]) # noinspection PyTypeChecker munged = ''.join( filter_iter(marked.__contains__, ['free', 'double', 'feat'])) # noinspection PyUnboundLocalVariable if ((non_marked and rc['filter'].search(munged)) or (not non_marked and not rc['filter'].search(munged))): continue try: head = head if None is not head else self._header_row( tr, { 'seed': r'circle-up', 'leech': r'circle-down', 'size': r'fa-file' }) seeders, leechers, size = [ try_int(n, n) for n in [ cells[head[x]].get_text().strip() for x in ('seed', 'leech', 'size') ] ] if self._reject_item(seeders, leechers): continue title = tr.find('a', href=rc['info']) title = title.get_text().strip( ) if None is self.resp else title[ 'data-original-title'] download_url = self._link(''.join( rc['get'].findall( tr.find('a', href=rc['get'])['href'])[0])) except (AttributeError, TypeError, ValueError, IndexError): continue if title and download_url: items[mode].append( (title, download_url, seeders, self._bytesizer(size))) except generic.HaltParseException: pass except (BaseException, Exception): logger.log( u'Failed to parse. Traceback: %s' % traceback.format_exc(), logger.ERROR) self._log_search(mode, len(items[mode]) - cnt, log + search_url) results = self._sort_seeding(mode, results + items[mode]) return results
def _search_provider(self, search_params, search_mode='eponly', epcount=0, **kwargs): results = [] if not self.url: return results items = {'Cache': [], 'Season': [], 'Episode': [], 'Propers': []} rc = dict([(k, re.compile('(?i)' + v)) for (k, v) in iteritems({ 'info': 'detail', 'get': 'download[^"]+magnet', 'tid': r'.*/(\d{5,}).*', 'verify': '(?:helper|moderator|trusted|vip)', 'size': r'size[^\d]+(\d+(?:[.,]\d+)?\W*[bkmgt]\w+)'})]) for mode in search_params: for search_string in search_params[mode]: search_string = unidecode(search_string) s_mode = 'browse' if 'Cache' == mode else 'search' for i in ('', '2'): search_url = self.urls['%s%s' % (s_mode, i)] if 'Cache' != mode: search_url = search_url % quote(search_string) html = self.get_url(search_url) if self.should_skip(): return results if html and not self._has_no_results(html): break cnt = len(items[mode]) try: if not html or self._has_no_results(html): self._url = None raise generic.HaltParseException with BS4Parser(html, parse_only=dict(table={'id': 'searchResult'})) as tbl: tbl_rows = [] if not tbl else tbl.find_all('tr') if 2 > len(tbl_rows): raise generic.HaltParseException head = None for tr in tbl.find_all('tr')[1:]: cells = tr.find_all('td') if 3 > len(cells): continue try: head = head if None is not head else self._header_row(tr) seeders, leechers = [try_int(cells[head[x]].get_text().strip()) for x in ('seed', 'leech')] if self._reject_item(seeders, leechers): continue info = tr.find('a', title=rc['info']) title = info.get_text().strip().replace('_', '.') tid = rc['tid'].sub(r'\1', str(info['href'])) download_magnet = tr.find('a', title=rc['get'])['href'] except (AttributeError, TypeError, ValueError): continue if self.confirmed and not tr.find('img', title=rc['verify']): logger.log(u'Skipping untrusted non-verified result: ' + title, logger.DEBUG) continue # Check number video files = episode in season and # find the real Quality for full season torrent analyzing files in torrent if 'Season' == mode and 'sponly' == search_mode: ep_number = int(epcount // len(set(show_name_helpers.allPossibleShowNames( self.show_obj)))) title = self._find_season_quality(title, tid, ep_number) if title and download_magnet: size = None try: size = rc['size'].findall(tr.find_all(class_='detDesc')[0].get_text())[0] except (BaseException, Exception): pass items[mode].append((title, download_magnet, seeders, self._bytesizer(size))) except generic.HaltParseException: pass except (BaseException, Exception): logger.log(u'Failed to parse. Traceback: %s' % traceback.format_exc(), logger.ERROR) self._log_search(mode, len(items[mode]) - cnt, search_url) results = self._sort_seeding(mode, results + items[mode]) return results
def html(self, mode, search_string, results): if 'Content-Type' in self.session.headers: del (self.session.headers['Content-Type']) setattr( self.session, 'reserved', { 'headers': { 'Accept': 'text/html, application/xhtml+xml, */*', 'Accept-Language': 'en-GB', 'Cache-Control': 'no-cache', 'Referer': 'https://broadcasthe.net/login.php', 'User-Agent': self.ua } }) self.headers = None if self.auth_html or self._authorised_html(): del (self.session.reserved['headers']['Referer']) if 'Referer' in self.session.headers: del (self.session.headers['Referer']) self.auth_html = True search_string = unidecode(search_string) search_url = self.urls['search'] % (search_string, self._categories_string( mode, 'filter_cat[%s]=1')) html = self.get_url(search_url, use_tmr_limit=False) if self.should_skip(log_warning=False, use_tmr_limit=False): return results cnt = len(results) try: if not html or self._has_no_results(html): raise generic.HaltParseException with BS4Parser(html) as soup: tbl = soup.find(id='torrent_table') tbl_rows = [] if not tbl else tbl.find_all('tr') if 2 > len(tbl_rows): raise generic.HaltParseException rc = dict([ (k, re.compile('(?i)' + v)) for (k, v) in iteritems({ 'cats': r'(?i)cat\[(?:%s)\]' % self._categories_string( mode, template='', delimiter='|'), 'get': 'download' }) ]) head = None for tr in tbl_rows[1:]: cells = tr.find_all('td') if 5 > len(cells): continue try: head = head if None is not head else self._header_row( tr) seeders, leechers, size = [ try_int(n, n) for n in [ cells[head[x]].get_text().strip() for x in ('seed', 'leech', 'size') ] ] if not tr.find( 'a', href=rc['cats']) or self._reject_item( seeders, leechers, container=self.reject_m2ts and (re.search(r'(?i)\[.*?m2?ts.*?\]', tr.get_text('', strip=True)))): continue title = tr.select('td span[title]')[0].attrs.get( 'title').strip() download_url = self._link( tr.find('a', href=rc['get'])['href']) except (AttributeError, TypeError, ValueError, KeyError, IndexError): continue if title and download_url: results.append((title, download_url, seeders, self._bytesizer(size))) except generic.HaltParseException: pass except (BaseException, Exception): logger.log( u'Failed to parse. Traceback: %s' % traceback.format_exc(), logger.ERROR) self._log_search(mode, len(results) - cnt, search_url) results = self._sort_seeding(mode, results) return results