def sources(self, url, hostDict, hostprDict): try: self._sources = [] self.items = [] if url is None: return self._sources if debrid.status() is False: return self._sources data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) self.title = data[ 'tvshowtitle'] if 'tvshowtitle' in data else data['title'] self.title = self.title.replace('&', 'and').replace( 'Special Victims Unit', 'SVU') self.hdlr = 'S%02dE%02d' % ( int(data['season']), int(data['episode']) ) if 'tvshowtitle' in data else data['year'] self.year = data['year'] query = '%s %s' % (self.title, self.hdlr) query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', '', query) urls = [] if 'tvshowtitle' in data: url = self.search2.format(urllib.quote(query)) else: url = self.search.format(urllib.quote(query)) url = urlparse.urljoin(self.base_link, url) urls.append(url) url2 = url + '/2/' urls.append(url2) # log_utils.log('urls = %s' % urls, log_utils.LOGDEBUG) threads = [] for url in urls: threads.append(workers.Thread(self._get_items, url)) [i.start() for i in threads] [i.join() for i in threads] threads2 = [] for i in self.items: threads2.append(workers.Thread(self._get_sources, i)) [i.start() for i in threads2] [i.join() for i in threads2] return self._sources except: source_utils.scraper_error('KICKASS2') return self._sources
def sources(self, url, hostDict, hostprDict): try: self._sources = [] self.items = [] if url is None: return self._sources if debrid.status() is False: return self._sources data = parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) self.title = data[ 'tvshowtitle'] if 'tvshowtitle' in data else data['title'] self.title = self.title.replace('&', 'and').replace( 'Special Victims Unit', 'SVU') self.episode_title = data[ 'title'] if 'tvshowtitle' in data else None self.hdlr = 'S%02dE%02d' % ( int(data['season']), int(data['episode']) ) if 'tvshowtitle' in data else data['year'] self.year = data['year'] self.aliases = data['aliases'] query = '%s %s' % (self.title, self.hdlr) query = re.sub('[^A-Za-z0-9\s\.-]+', '', query) urls = [] if 'tvshowtitle' in data: urls.append(self.tvsearch % (quote(query))) else: urls.append(self.moviesearch % (quote(query))) url2 = ''.join(urls).replace('/1/', '/2/') urls.append(url2) # log_utils.log('urls = %s' % urls, log_utils.LOGDEBUG) threads = [] for url in urls: threads.append(workers.Thread(self._get_items, url)) [i.start() for i in threads] [i.join() for i in threads] threads2 = [] for i in self.items: threads2.append(workers.Thread(self._get_sources, i)) [i.start() for i in threads2] [i.join() for i in threads2] return self._sources except: source_utils.scraper_error('1337X') return self._sources
def sources(self, url, hostDict, hostprDict): try: self._sources = [] if url is None: return self._sources if debrid.status() is False: raise Exception() data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) self.title = data[ 'tvshowtitle'] if 'tvshowtitle' in data else data['title'] self.hdlr = 'S%02dE%02d' % ( int(data['season']), int(data['episode']) ) if 'tvshowtitle' in data else data['year'] query = '%s S%02dE%02d' % ( data['tvshowtitle'], int(data['season']), int(data['episode'])) if 'tvshowtitle' in data else '%s %s' % ( data['title'], data['year']) query = re.sub(r'(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', ' ', query) if 'tvshowtitle' in data: url = self.search.format('8', urllib.quote(query)) else: url = self.search.format('4', urllib.quote(query)) self.hostDict = hostDict + hostprDict headers = {'User-Agent': client.agent()} _html = client.request(url, headers=headers) threads = [] for i in re.findall(r'<item>(.+?)</item>', _html, re.DOTALL): threads.append(workers.Thread(self._get_items, i)) [i.start() for i in threads] [i.join() for i in threads] return self._sources except BaseException: return self._sources
def sources(self, url, hostDict, hostprDict): try: self._sources = [] self.items = [] if url is None: return self._sources if debrid.status() is False: raise Exception() data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) self.title = data['tvshowtitle'] if 'tvshowtitle' in data else data['title'] self.hdlr = 'S%02dE%02d' % (int(data['season']), int(data['episode'])) if 'tvshowtitle' in data else data[ 'year'] query = '%s S%02dE%02d' % ( data['tvshowtitle'], int(data['season']), int(data['episode'])) if 'tvshowtitle' in data else '%s %s' % (data['title'], data['year']) query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', ' ', query) if 'tvshowtitle' in data: url = self.tvsearch.format(urllib.quote(query)) else: url = self.moviesearch.format(urllib.quote(query)) self._get_items(url) self.hostDict = hostDict + hostprDict threads = [] for i in self.items: threads.append(workers.Thread(self._get_sources, i)) [i.start() for i in threads] [i.join() for i in threads] return self._sources except BaseException: return self._sources
def sources(self, url, hostDict, hostprDict): try: self._sources = [] if url is None: return self._sources if debrid.status() is False: raise Exception() self.hostDict = hostDict + hostprDict data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) self.title = data[ 'tvshowtitle'] if 'tvshowtitle' in data else data['title'] self.title = self.title.replace('&', 'and').replace( 'Special Victims Unit', 'SVU') self.hdlr = 'S%02dE%02d' % ( int(data['season']), int(data['episode']) ) if 'tvshowtitle' in data else data['year'] self.year = data['year'] query = '%s %s' % (self.title, self.hdlr) query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', '', query) url = self.search_link % urllib.quote_plus(query) url = urlparse.urljoin(self.base_link, url) # log_utils.log('url = %s' % url, log_utils.LOGDEBUG) r = client.request(url) # switch to client.parseDOM() to rid import posts = dom_parser.parse_dom(r, 'div', {'class': 'eTitle'}) posts = [ dom_parser.parse_dom(i.content, 'a', req='href') for i in posts if i ] posts = [(i[0].attrs['href'], re.sub('<.+?>', '', i[0].content)) for i in posts if i] posts = [[i[0], i[1]] for i in posts] threads = [] for i in posts: threads.append(workers.Thread(self._get_sources, i)) [i.start() for i in threads] [i.join() for i in threads] alive = [x for x in threads if x.is_alive() is True] while alive: alive = [x for x in threads if x.is_alive() is True] time.sleep(0.1) return self._sources except: source_utils.scraper_error('ONLINESERIES') return self._sources
def sources_packs(self, url, hostDict, hostprDict, search_series=False, total_seasons=None, bypass_filter=False): self.sources = [] try: self.search_series = search_series self.total_seasons = total_seasons self.bypass_filter = bypass_filter if url is None: return self.sources if debrid.status() is False: return self.sources data = parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) self.title = data['tvshowtitle'].replace('&', 'and').replace( 'Special Victims Unit', 'SVU') self.aliases = data['aliases'] self.imdb = data['imdb'] self.year = data['year'] self.season_x = data['season'] self.season_xx = self.season_x.zfill(2) query = re.sub('[^A-Za-z0-9\s\.-]+', '', self.title) queries = [ self.search_link.format( query[0].lower(), cleantitle.geturl(query + ' S%s' % self.season_xx)), self.search_link.format( query[0].lower(), cleantitle.geturl(query + ' Season %s' % self.season_x)) ] if search_series: queries = [ self.search_link.format( query[0].lower(), cleantitle.geturl(query + ' Season')), self.search_link.format( query[0].lower(), cleantitle.geturl(query + ' Complete')) ] threads = [] for url in queries: link = urljoin(self.base_link, url) threads.append(workers.Thread(self.get_sources_packs, link)) [i.start() for i in threads] [i.join() for i in threads] return self.sources except: source_utils.scraper_error('MAGNETDL') return self.sources
def get(self, netloc, ua, timeout): threads = [] for i in range(0, 15): threads.append(workers.Thread(self.get_cookie, netloc, ua, timeout)) [i.start() for i in threads] for i in range(0, 30): if not self.cookie == None: return self.cookie time.sleep(1)
def sources(self, url, hostDict, hostprDict): self.scraper = cfscrape.create_scraper() self.sources = [] try: if url is None: return self.sources if debrid.status() is False: return self.sources self.hostDict = hostprDict + hostDict data = urlparse.parse_qs(url) self.data = data data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) url = data['url'] # log_utils.log('url = %s' % url, log_utils.LOGDEBUG) self.title = data[ 'tvshowtitle'] if 'tvshowtitle' in data else data['title'] self.title = self.title.replace('&', 'and').replace( 'Special Victims Unit', 'SVU') self.hdlr = 'S%02dE%02d' % ( int(data['season']), int(data['episode']) ) if 'tvshowtitle' in data else data['year'] self.year = data['year'] self.headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; …) Gecko/20100101 Firefox/75.0' } r = self.scraper.get(url, headers=self.headers).content if 'Nothing Found' in r: return self.sources posts = client.parseDOM(r, 'h2', attrs={'class': 'title'}) posts = zip(client.parseDOM(posts, 'a', ret='title'), client.parseDOM(posts, 'a', ret='href')) if posts == []: return self.sources threads = [] for item in posts: threads.append(workers.Thread(self.get_sources, item)) [i.start() for i in threads] [i.join() for i in threads] return self.sources except: source_utils.scraper_error('TVDOWNLOADS') return sources
def sources(self, url, hostDict, hostprDict): self.sources = [] try: scraper = cfscrape.create_scraper() if url is None: return self.sources if debrid.status() is False: return self.sources data = parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) self.title = data[ 'tvshowtitle'] if 'tvshowtitle' in data else data['title'] self.title = self.title.replace('&', 'and').replace( 'Special Victims Unit', 'SVU') self.aliases = data['aliases'] self.episode_title = data[ 'title'] if 'tvshowtitle' in data else None self.hdlr = 'S%02dE%02d' % ( int(data['season']), int(data['episode']) ) if 'tvshowtitle' in data else data['year'] self.year = data['year'] query = '%s %s' % (self.title, self.hdlr) query = re.sub('[^A-Za-z0-9\s\.-]+', '', query) urls = [] url = self.search_link % quote_plus(query) url = urljoin(self.base_link, url) urls.append(url) # urls.append('%s%s' % (url, '&page=2')) # next page seems broken right now # urls.append('%s%s' % (url, '&page=3')) # log_utils.log('urls = %s' % urls, log_utils.LOGDEBUG) links = [] for x in urls: r = scraper.get(x).content if not r: continue list = client.parseDOM(r, 'tr', attrs={'class': 'tlr'}) list += client.parseDOM(r, 'tr', attrs={'class': 'tlz'}) for item in list: links.append(item) threads = [] for link in links: threads.append(workers.Thread(self.get_sources, link)) [i.start() for i in threads] [i.join() for i in threads] return self.sources except: source_utils.scraper_error('EXTRATORRENT') return self.sources
def sources(self, url, hostDict, hostprDict): try: self.sources = [] if url is None: return self.sources if debrid.status() is False: raise Exception() self.hostDict = hostDict + hostprDict data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) title = data['tvshowtitle'] if 'tvshowtitle' in data else data[ 'title'] # title = title.replace('&', 'and').replace('Special Victims Unit', 'SVU') hdlr = data['year'] hdlr2 = 'S%02dE%02d' % (int(data['season']), int( data['episode'])) if 'tvshowtitle' in data else '' imdb = data['imdb'] url = self.search(title, hdlr) headers = {'User-Agent': client.agent()} r = self.scraper.get(url, headers=headers).content if hdlr2 == '': r = dom_parser.parse_dom(r, 'ul', {'id': 'releases'})[0] else: r = dom_parser.parse_dom(r, 'ul', {'id': 'episodes'})[0] r = dom_parser.parse_dom(r.content, 'a', req=['href']) r = [(i.content, urlparse.urljoin(self.base_link, i.attrs['href'])) for i in r if i and i.content != 'Watch'] if hdlr2 != '': r = [(i[0], i[1]) for i in r if hdlr2.lower() in i[0].lower()] threads = [] for i in r: threads.append(workers.Thread(self._get_sources, i[0], i[1])) [i.start() for i in threads] # [i.join() for i in threads] alive = [x for x in threads if x.is_alive() is True] while alive: alive = [x for x in threads if x.is_alive() is True] time.sleep(0.1) return self.sources except: source_utils.scraper_error('RAPIDMOVIEZ') return self.sources
def sources_packs(self, url, hostDict, hostprDict, search_series=False, total_seasons=None, bypass_filter=False): self.sources = [] try: self.search_series = search_series self.total_seasons = total_seasons self.bypass_filter = bypass_filter if url is None: return self.sources if debrid.status() is False: return self.sources data = parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) self.title = data['tvshowtitle'].replace('&', 'and').replace( 'Special Victims Unit', 'SVU') self.aliases = data['aliases'] self.imdb = data['imdb'] self.year = data['year'] self.season_x = data['season'] self.season_xx = self.season_x.zfill(2) category = '+category%3ATV' # query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', '', self.title) query = re.sub('[^A-Za-z0-9\s\.-]+', '', self.title) queries = [ self.search_link % quote_plus(query + ' S%s' % self.season_xx), self.search_link % quote_plus(query + ' Season %s' % self.season_x) ] if self.search_series: queries = [ self.search_link % quote_plus(query + ' Season'), self.search_link % quote_plus(query + ' Complete') ] threads = [] for url in queries: link = urljoin(self.base_link, url) + str(category) + '&v=t&s=sz&sd=d' threads.append(workers.Thread(self.get_sources_packs, link)) [i.start() for i in threads] [i.join() for i in threads] return self.sources except: source_utils.scraper_error('ZOOGLE') return self.sources
def sources(self, url, hostDict, hostprDict): self.sources = [] try: scraper = cfscrape.create_scraper() if url is None: return self.sources if debrid.status() is False: return self.sources data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) self.title = data[ 'tvshowtitle'] if 'tvshowtitle' in data else data['title'] self.title = self.title.replace('&', 'and').replace( 'Special Victims Unit', 'SVU') self.hdlr = 'S%02dE%02d' % ( int(data['season']), int(data['episode']) ) if 'tvshowtitle' in data else data['year'] self.year = data['year'] query = '%s %s' % (self.title, self.hdlr) query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', '', query) urls = [] url = self.search_link % urllib.quote_plus(query) url = urlparse.urljoin(self.base_link, url) urls.append(url) urls.append(url.replace('/1/', '/2/')) urls.append(url.replace('/1/', '/3/')) # log_utils.log('urls = %s' % urls, log_utils.LOGDEBUG) links = [] for x in urls: r = scraper.get(x).content list = client.parseDOM(r, 'tr', attrs={'class': 'tlz'}) for item in list: links.append(item) threads = [] for link in links: threads.append(workers.Thread(self.get_sources, link)) [i.start() for i in threads] [i.join() for i in threads] return self.sources except: source_utils.scraper_error('EXTRATORRENT') return self.sources
def sources(self, url, hostDict, hostprDict): try: self._sources = [] if url is None: return self._sources if debrid.status() is False: raise Exception() data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) title = data['tvshowtitle'] if 'tvshowtitle' in data else data[ 'title'] hdlr = 'S%02dE%02d' % (int(data['season']), int( data['episode'])) if 'tvshowtitle' in data else data['year'] query = '%s S%02dE%02d' % ( data['tvshowtitle'], int(data['season']), int(data['episode'])) if 'tvshowtitle' in data else '%s %s' % ( data['title'], data['year']) query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', ' ', query) query = self.search_link % cleantitle.geturl(query) url = urlparse.urljoin(self.base_link, query) r = client.request(url) posts = dom_parser2.parse_dom(r, 'div', {'class': 'eTitle'}) posts = [ dom_parser2.parse_dom(i.content, 'a', req='href') for i in posts if i ] posts = [(i[0].attrs['href'], re.sub('<.+?>', '', i[0].content)) for i in posts if i] posts = [ (i[0], i[1]) for i in posts if (cleantitle.get_simple(i[1].split(hdlr)[0]) == cleantitle.get(title) and hdlr.lower() in i[1].lower()) ] self.hostDict = hostDict + hostprDict threads = [] for i in posts: threads.append(workers.Thread(self._get_sources, i)) [i.start() for i in threads] [i.join() for i in threads] alive = [x for x in threads if x.is_alive() == True] while alive: alive = [x for x in threads if x.is_alive() == True] time.sleep(0.1) return self._sources except Exception: return self._sources
def sources(self, url, hostDict, hostprDict): self.sources = [] try: if url is None: return self.sources if debrid.status() is False: return self.sources data = parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) self.title = data[ 'tvshowtitle'] if 'tvshowtitle' in data else data['title'] self.title = self.title.replace('&', 'and').replace( 'Special Victims Unit', 'SVU') self.aliases = data['aliases'] self.episode_title = data[ 'title'] if 'tvshowtitle' in data else None self.hdlr = 'S%02dE%02d' % ( int(data['season']), int(data['episode']) ) if 'tvshowtitle' in data else data['year'] self.year = data['year'] query = '%s %s' % (self.title, self.hdlr) query = re.sub('[^A-Za-z0-9\s\.-]+', '', query) url = self.search_link % quote_plus(query) url = urljoin(self.base_link, url) # log_utils.log('url = %s' % url, log_utils.LOGDEBUG) try: r = client.request(url, timeout='5') if r is None: return self.sources links = re.findall('<a href=(/torrent/.+?)>', r, re.DOTALL) threads = [] for link in links: threads.append(workers.Thread(self.get_sources, link)) [i.start() for i in threads] [i.join() for i in threads] return self.sources except: source_utils.scraper_error('TORLOCK') return self.sources except: source_utils.scraper_error('TORLOCK') return self.sources
def sources(self, url, hostDict, hostprDict): self._sources = [] try: self.scraper = cfscrape.create_scraper() if url is None: return self._sources data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) title = data['tvshowtitle'] if 'tvshowtitle' in data else data['title'] title = title.replace('&', 'and').replace('Special Victims Unit', 'SVU') hdlr = 'S%02d' % (int(data['season'])) if 'tvshowtitle' in data else data['year'] query = '%s %s' % (title, hdlr) query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', '', query) url = self.search_link % urllib.quote_plus(query) url = urlparse.urljoin(self.base_link, url) # log_utils.log('url = %s' % url, log_utils.LOGDEBUG) r = self.scraper.get(url).content posts = client.parseDOM(r, 'figure') items = [] for post in posts: try: url = client.parseDOM(post, 'a', ret='href')[0] name = client.parseDOM(post, 'img', ret='title')[0].replace(' ', '.') if source_utils.remove_lang(name): continue match = source_utils.check_title(title, name, hdlr, data['year']) if not match: continue items.append((url, name)) except: source_utils.scraper_error('MKVHUB') pass threads = [] for i in items: threads.append(workers.Thread(self._get_sources, i[0], i[1], hostDict, hostprDict)) [i.start() for i in threads] [i.join() for i in threads] return self._sources except: source_utils.scraper_error('MKVHUB') return self._sources
def sources(self, url, hostDict, hostprDict): # startTime = time.time() self.sources = [] try: if url is None: return self.sources if debrid.status() is False: return self.sources data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) self.title = data[ 'tvshowtitle'] if 'tvshowtitle' in data else data['title'] self.title = self.title.replace('&', 'and').replace( 'Special Victims Unit', 'SVU') self.hdlr = 'S%02dE%02d' % ( int(data['season']), int(data['episode']) ) if 'tvshowtitle' in data else data['year'] self.year = data['year'] query = '%s %s' % (self.title, self.hdlr) query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', '', query) url = self.search_link % urllib.quote_plus(query) url = urlparse.urljoin(self.base_link, url) # log_utils.log('url = %s' % url, log_utils.LOGDEBUG) try: r = client.request(url) links = re.findall('<a href=(/torrent/.+?)>', r, re.DOTALL) threads = [] for link in links: threads.append(workers.Thread(self.get_sources, link)) [i.start() for i in threads] [i.join() for i in threads] # endTime = time.time() # log_utils.log('TORLOCK scrape time = %s' % str(endTime - startTime), __name__, log_utils.LOGDEBUG) return self.sources except: source_utils.scraper_error('TORLOCK') return self.sources except: source_utils.scraper_error('TORLOCK') return self.sources
def sources(self, url, hostDict, hostprDict): self.sources = [] try: if url is None: return self.sources if debrid.status() is False: return self.sources data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) self.title = data[ 'tvshowtitle'] if 'tvshowtitle' in data else data['title'] self.title = self.title.replace('&', 'and').replace( 'Special Victims Unit', 'SVU') self.hdlr = 'S%02dE%02d' % ( int(data['season']), int(data['episode']) ) if 'tvshowtitle' in data else data['year'] self.year = data['year'] query = '%s %s' % (self.title, self.hdlr) query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', '', query) url = self.search_link % urllib.quote_plus(query) url = urlparse.urljoin(self.base_link, url) # log_utils.log('url = %s' % url, log_utils.LOGDEBUG) try: r = client.request(url) links = client.parseDOM(r, "td", attrs={"nowrap": "nowrap"}) threads = [] for link in links: threads.append(workers.Thread(self.get_sources, link)) [i.start() for i in threads] [i.join() for i in threads] return self.sources except: source_utils.scraper_error('ETTV') return self.sources except: source_utils.scraper_error('ETTV') return self.sources
def sources(self, url, hostDict, hostprDict): try: self._sources = [] if url is None: return self._sources if debrid.status() is False: return self._sources data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) self.title = data[ 'tvshowtitle'] if 'tvshowtitle' in data else data['title'] self.title = self.title.replace('&', 'and').replace( 'Special Victims Unit', 'SVU') self.hdlr = 'S%02dE%02d' % ( int(data['season']), int(data['episode']) ) if 'tvshowtitle' in data else data['year'] self.year = data['year'] query = '%s %s' % (self.title, self.hdlr) query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', '', query) if 'tvshowtitle' in data: url = self.search.format('8', urllib.quote(query)) else: url = self.search.format('4', urllib.quote(query)) # log_utils.log('url = %s' % url, log_utils.LOGDEBUG) headers = {'User-Agent': client.agent()} _html = client.request(url, headers=headers) threads = [] for i in re.findall(r'<item>(.+?)</item>', _html, re.DOTALL): threads.append(workers.Thread(self._get_items, i)) [i.start() for i in threads] [i.join() for i in threads] return self._sources except: source_utils.scraper_error('TORRENTDOWNLOADS') return self._sources
def sources(self, url, hostDict, hostprDict): self.sources = [] try: if url is None: return self.sources if debrid.status() is False: return self.sources data = parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) self.title = data[ 'tvshowtitle'] if 'tvshowtitle' in data else data['title'] self.title = self.title.replace('&', 'and').replace( 'Special Victims Unit', 'SVU') self.aliases = data['aliases'] self.episode_title = data[ 'title'] if 'tvshowtitle' in data else None self.hdlr = 'S%02dE%02d' % ( int(data['season']), int(data['episode']) ) if 'tvshowtitle' in data else data['year'] self.year = data['year'] query = '%s %s' % (self.title, self.hdlr) query = re.sub('[^A-Za-z0-9\s\.-]+', '', query) urls = [] url = self.search_link % quote_plus(query) url = urljoin(self.base_link, url) urls.append(url) urls.append(url + '&skip=20') urls.append(url + '&skip=40') urls.append(url + '&skip=60') urls.append(url + '&skip=80') # log_utils.log('urls = %s' % urls, log_utils.LOGDEBUG) threads = [] for url in urls: threads.append(workers.Thread(self.get_sources, url)) [i.start() for i in threads] [i.join() for i in threads] return self.sources except: source_utils.scraper_error('SOLIDTORRENTS') return self.sources
def sources(self, url, hostDict, hostprDict): self.sources = [] try: if url is None: return self.sources if debrid.status() is False: return self.sources data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) self.title = data['tvshowtitle'] if 'tvshowtitle' in data else data['title'] self.title = self.title.replace('&', 'and').replace('Special Victims Unit', 'SVU') self.hdlr = 'S%02dE%02d' % (int(data['season']), int(data['episode'])) if 'tvshowtitle' in data else data['year'] self.year = data['year'] query = '%s %s' % (self.title, self.hdlr) query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', '', query) url = self.search_link % urllib.quote_plus(query) url = urlparse.urljoin(self.base_link, url) # log_utils.log('url = %s' % url, log_utils.LOGDEBUG) try: r = client.request(url) div = client.parseDOM(r, 'div', attrs={'class': 'panel panel-default'})[0] table = client.parseDOM(div, 'table', attrs={'class': 'table table-striped table-bordered table-hover table-condensed'})[0] links = re.findall('<a href="(.+?)">', table, re.DOTALL) # log_utils.log('links = %s' % links, log_utils.LOGDEBUG) threads = [] for link in links: threads.append(workers.Thread(self.get_sources, link)) [i.start() for i in threads] [i.join() for i in threads] return self.sources except: source_utils.scraper_error('TORLOCK') return self.sources except: source_utils.scraper_error('TORLOCK') return self.sources
def sources(self, url, hostDict, hostprDict): self.sources = [] try: if url is None: return self.sources if debrid.status() is False: return self.sources data = parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) self.title = data['tvshowtitle'].lower( ) if 'tvshowtitle' in data else data['title'].lower() self.title = self.title.replace('&', 'and').replace( 'Special Victims Unit', 'SVU') self.hdlr = 's%02de%02d' % ( int(data['season']), int(data['episode']) ) if 'tvshowtitle' in data else data['year'] self.year = data['year'] query = '%s %s' % (self.title, self.hdlr) query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', '', query) url = self.search_link % quote_plus(query) url = urljoin(self.base_link, url).replace('+', '-') # log_utils.log('url = %s' % url, log_utils.LOGDEBUG) r = client.request(url, timeout='5') if r is None: return self.sources links = re.findall('<a href="(/torrent/.+?)"', r, re.DOTALL) threads = [] for link in links: threads.append(workers.Thread(self.get_sources, link)) [i.start() for i in threads] [i.join() for i in threads] return self.sources except: source_utils.scraper_error('YOURBITTORRENT') return self.sources
def sources(self, url, hostDict, hostprDict): self.sources = [] try: if url is None: return self.sources if debrid.status() is False: return self.sources data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) self.title = data[ 'tvshowtitle'] if 'tvshowtitle' in data else data['title'] self.title = self.title.replace('&', 'and').replace( 'Special Victims Unit', 'SVU') self.hdlr = 'S%02dE%02d' % ( int(data['season']), int(data['episode']) ) if 'tvshowtitle' in data else data['year'] self.year = data['year'] category = '+category%3ATV' if 'tvshowtitle' in data else '+category%3AMovies' query = '%s %s' % (self.title, self.hdlr) query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', '', query) urls = [] url = self.search_link % urllib.quote_plus(query) url = urlparse.urljoin(self.base_link, url) + str(category) + '&v=t&s=sz&sd=d' urls.append(url) urls.append(url.replace('pg=1', 'pg=2')) # log_utils.log('urls = %s' % urls, log_utils.LOGDEBUG) threads = [] for url in urls: threads.append(workers.Thread(self._get_sources, url)) [i.start() for i in threads] [i.join() for i in threads] return self.sources except: source_utils.scraper_error('ZOOGLE') return self.sources
def sources(self, url, hostDict, hostprDict): self.sources = [] try: if url is None: return self.sources if debrid.status() is False: return self.sources data = parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) self.title = data[ 'tvshowtitle'] if 'tvshowtitle' in data else data['title'] self.title = self.title.replace('&', 'and').replace( 'Special Victims Unit', 'SVU') self.hdlr = 'S%02dE%02d' % ( int(data['season']), int(data['episode']) ) if 'tvshowtitle' in data else data['year'] self.year = data['year'] query = '%s %s' % (self.title, self.hdlr) query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', '', query) urls = [] # url = self.search_link % quote_plus(query) url = self.search_link % quote(query + ' -soundtrack') url = urljoin(self.base_link, url) urls.append(url) urls.append(url + '&page=2') # log_utils.log('urls = %s' % urls, __name__, log_utils.LOGDEBUG) threads = [] for url in urls: threads.append(workers.Thread(self._get_sources, url)) [i.start() for i in threads] [i.join() for i in threads] return self.sources except: source_utils.scraper_error('BTDB') return self.sources
def sources(self, url, hostDict, hostprDict): try: self._sources = [] if url is None: return self._sources if debrid.status() is False: raise Exception() data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) title = data['tvshowtitle'] if 'tvshowtitle' in data else data[ 'title'] hdlr = 'S%02dE%02d' % (int(data['season']), int( data['episode'])) if 'tvshowtitle' in data else data['year'] query = '%s S%02dE%02d' % ( data['tvshowtitle'], int(data['season']), int(data['episode'])) if 'tvshowtitle' in data else '%s %s' % ( data['title'], data['year']) query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', ' ', query) query = cleantitle.geturl(query) url = urlparse.urljoin(self.base_link, query) headers = {'User-Agent': client.agent()} r = client.request(url, headers=headers) posts = dom_parser2.parse_dom(r, 'li', { 'class': re.compile('.+?'), 'id': re.compile('comment-.+?') }) self.hostDict = hostDict + hostprDict threads = [] for i in posts: threads.append(workers.Thread(self._get_sources, i.content)) [i.start() for i in threads] [i.join() for i in threads] return self._sources except Exception: return self._sources
def sources(self, url, hostDict, hostprDict): self.sources = [] try: if url is None: return self.sources if debrid.status() is False: return self.sources data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) self.title = data[ 'tvshowtitle'] if 'tvshowtitle' in data else data['title'] self.title = self.title.replace('&', 'and').replace( 'Special Victims Unit', 'SVU') self.hdlr = 'S%02dE%02d' % ( int(data['season']), int(data['episode']) ) if 'tvshowtitle' in data else data['year'] self.year = data['year'] query = '%s %s' % (self.title, self.hdlr) query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', '', query) urls = [] url = self.search_link % urllib.quote_plus(query) url = urlparse.urljoin(self.base_link, url) urls.append(url) # urls.append(url.replace('page=0', 'page=40')) # server response time WAY to slow to parse 2 pages deep, site sucks. # log_utils.log('urls = %s' % urls, log_utils.LOGDEBUG) threads = [] for url in urls: threads.append(workers.Thread(self._get_sources, url)) [i.start() for i in threads] [i.join() for i in threads] return self.sources except: source_utils.scraper_error('ISOHUNT2') return self.sources
def sources(self, url, hostDict, hostprDict): try: self._sources = [] if url is None: return self._sources data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) title = data['tvshowtitle'] if 'tvshowtitle' in data else data[ 'title'] hdlr = 'S%02dE%02d' % (int(data['season']), int( data['episode'])) if 'tvshowtitle' in data else data['year'] query = '%s S%02dE%02d' % ( data['tvshowtitle'], int(data['season']), int(data['episode'])) if 'tvshowtitle' in data else '%s %s' % ( data['title'], data['year']) query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', ' ', query) url = self.search_link % urllib.quote_plus(query) url = urlparse.urljoin(self.base_link, url) r = client.request(url) posts = client.parseDOM(r, 'figure') items = [] for post in posts: try: tit = client.parseDOM(post, 'img', ret='title')[0] t = tit.split(hdlr)[0].replace('(', '') if not cleantitle.get(t) == cleantitle.get(title): raise Exception() if hdlr not in tit: raise Exception() url = client.parseDOM(post, 'a', ret='href')[0] try: size = re.findall( '((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GB|GiB|Gb|MB|MiB|Mb))', post)[0] div = 1 if size.endswith(('GB', 'GiB', 'Gb')) else 1024 size = float( re.sub('[^0-9|/.|/,]', '', size.replace( ',', '.'))) / div size = '%.2f GB' % size except: size = '0' items += [(tit, url, size)] except: pass datos = [] for title, url, size in items: try: name = client.replaceHTMLCodes(title) quality, info = source_utils.get_release_quality( name, name) info.append(size) info = ' | '.join(info) datos.append((url, quality, info)) except: pass threads = [] for i in datos: threads.append( workers.Thread(self._get_sources, i[0], i[1], i[2], hostDict, hostprDict)) [i.start() for i in threads] [i.join() for i in threads] return self._sources except BaseException: return self._sources
def sources(self, url, hostDict, hostprDict): try: self._sources = [] if url is None: return self._sources if debrid.status() is False: raise Exception() data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) self.title = data[ 'tvshowtitle'] if 'tvshowtitle' in data else data['title'] self.hdlr = 'S%02dE%02d' % ( int(data['season']), int(data['episode']) ) if 'tvshowtitle' in data else data['year'] self.hostDict = hostprDict + hostDict items = [] urls = [] posts = [] links = [] if 'tvshowtitle' not in data: url = urlparse.urljoin(self.base_link, self.search_link % data['imdb']) r = client.request(url, headers={'User-Agent': client.agent()}) posts = client.parseDOM(r, 'item') else: url = urlparse.urljoin( self.base_link, self.search_link % (cleantitle.geturl(self.title).replace('-', '+') + '+' + self.hdlr)) r = client.request(url, headers={'User-Agent': client.agent()}) posts = client.parseDOM(r, 'item') if not posts: return self._sources for post in posts: try: t = client.parseDOM(post, 'title')[0] u = client.parseDOM(post, 'link')[0] s = re.search( '((?:\d+\.\d+|\d+\,\d+|\d+)\s*(?:GiB|MiB|GB|MB))', post) s = s.groups()[0] if s else '0' items += [(t, u, s)] except BaseException: pass items = set(items) threads = [] for i in items: threads.append(workers.Thread(self._get_sources, i)) [i.start() for i in threads] [i.join() for i in threads] return self._sources except BaseException: return self._sources
def sources(self, url, hostDict, hostprDict): try: self._sources = [] if url is None: return self._sources data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) title = data['tvshowtitle'] if 'tvshowtitle' in data else data[ 'title'] title = title.replace('&', 'and').replace('Special Victims Unit', 'SVU') hdlr = 'S%02d' % (int( data['season'])) if 'tvshowtitle' in data else data['year'] query = '%s %s' % (title, hdlr) query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', '', query) url = self.search_link % urllib.quote_plus(query) url = urlparse.urljoin(self.base_link, url) # log_utils.log('url = %s' % url, log_utils.LOGDEBUG) r = client.request(url) posts = client.parseDOM(r, 'figure') items = [] for post in posts: try: tit = client.parseDOM(post, 'img', ret='title')[0] tit = client.replaceHTMLCodes(tit) t = tit.split(hdlr)[0].replace(data['year'], '').replace( '(', '').replace(')', '').replace('&', 'and') if cleantitle.get(t) != cleantitle.get(title): continue if hdlr not in tit: continue url = client.parseDOM(post, 'a', ret='href')[0] items.append((url, tit)) except: source_utils.scraper_error('MKVHUB') pass threads = [] for i in items: threads.append( workers.Thread(self._get_sources, i[0], i[1], hostDict, hostprDict)) [i.start() for i in threads] [i.join() for i in threads] return self._sources except: source_utils.scraper_error('MKVHUB') return self._sources
def sources(self, url, hostDict, hostprDict): try: self._sources = [] if url is None: return self._sources if debrid.status() is False: raise Exception() data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) title = data['tvshowtitle'] if 'tvshowtitle' in data else data['title'] hdlr = 'S%02dE%02d' % (int(data['season']), int(data['episode'])) if 'tvshowtitle' in data else data['year'] query = '%s S%02dE%02d' % ( data['tvshowtitle'], int(data['season']), int(data['episode'])) if 'tvshowtitle' in data else '%s %s' % ( data['title'], data['year']) query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', ' ', query) url = self.search_link % urllib.quote_plus(query) url = urlparse.urljoin(self.base_link, url) r = self.scraper.get(url).content posts = client.parseDOM(r, 'div', attrs={'class': 'item'}) hostDict = hostprDict + hostDict items = [] for post in posts: try: tit = client.parseDOM(post, 'img', ret='alt')[0] c = client.parseDOM(post, 'a', ret='href')[0] name = tit name = client.replaceHTMLCodes(name) t = re.sub('(\.|\(|\[|\s)(\d{4}|S\d+E\d+|S\d+|3D)(\.|\)|\]|\s|)(.+|)', '', name, flags=re.I) if not cleantitle.get(t) == cleantitle.get(title): raise Exception() try: y = re.findall('(?:\.|\(|\[|\s*|)(S\d+E\d+|S\d+)(?:\.|\)|\]|\s*|)', name, re.I)[-1].upper() except Exception: y = re.findall('(?:\.|\(|\[|\s*|)(\d{4})(?:\.|\)|\]|\s*|)', name, re.I)[0].upper() if not y == hdlr: raise Exception() try: s = re.findall('((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GB|GiB|MB|MiB))', t)[0] except BaseException: s = '0' items += [(tit, c, s)] except Exception: pass threads = [] for item in items: threads.append(workers.Thread(self._get_sources, item, hostDict)) [i.start() for i in threads] [i.join() for i in threads] return self._sources except Exception: return self._sources