def _retrieveInv(self, query, required=None): timeout = self.timeoutTotal / 2 timeoutThread = timeout - 2 threads = [] if required: indexes = self._indexes(required, self.inv_split) self.inv_count = 0 self.inv_total = len(indexes) for i in indexes: threads.append( threading.Thread(target=self._fetchInv, args=(i, required, timeoutThread, True))) indexes = self._indexes(query, self.inv_split) for i in indexes: threads.append( threading.Thread(target=self._fetchInv, args=(i, query, timeoutThread, False))) [i.start() for i in threads] tools.Time.sleep(0.5) timer = tools.Time(start=True) while timer.elapsed() < timeout and any(i.is_alive() for i in threads): tools.Time.sleep(0.5) self.timeoutCurrent = timer.elapsed() # For short titles, like "V for Vendetta". # Do not search for the required keywords by default, since these often contain a bunch of links, causing the query to be extremely slow. if len(query) < self.thresholdWords: self.inv_items.extend(self.inv_required)
def _items(self, category, title, titles, year, season, episode, pack): try: timerEnd = tools.Settings.getInteger( 'scraping.providers.timeout') - 3 timer = tools.Time(start=True) items = offcloud.Core().items(category=category) try: self.mutex.acquire() except: pass threads = [] for item in items: if item['status'] == offcloud.Core.StatusFinished: # Only finished downloads. id = item['id'] if not id in self.ids: meta = metadata.Metadata(name=item['name'], title=title, titles=titles, year=year, season=season, episode=episode, pack=pack) if not meta.ignore(size=False): self.ids.append(id) if category == offcloud.Core.CategoryInstant: self.items.append(item) else: threads.append( threading.Thread(target=self._item, args=(category, id, season, episode))) try: self.mutex.release() except: pass if len(threads) > 0: [thread.start() for thread in threads] while True: if timer.elapsed() > timerEnd: break if all([not thread.is_alive() for thread in threads]): break time.sleep(0.5) except: tools.Logger.error()
def _retrieveInx(self, query): timeout = self.timeoutTotal - self.timeoutCurrent timeoutThread = timeout - 2 threads = [] indexes = self._indexes(self.inv_items, self.inx_split) for i in indexes: threads.append( threading.Thread(target=self._fetchInx, args=(i, query, timeoutThread))) [i.start() for i in threads] tools.Time.sleep(0.5) timer = tools.Time(start=True) while timer.elapsed() < timeout and any(i.is_alive() for i in threads): tools.Time.sleep(0.5)
def sources(self, url, hostDict, hostprDict): sources = [] try: if url == None: raise Exception() ignoreContains = None data = self._decode(url) if 'exact' in data and data['exact']: query = title = data['tvshowtitle'] if 'tvshowtitle' in data else data['title'] titles = None year = None season = None episode = None pack = False packCount = None else: title = data['tvshowtitle'] if 'tvshowtitle' in data else data['title'] titles = data['alternatives'] if 'alternatives' in data else None year = int(data['year']) if 'year' in data and not data['year'] == None else None season = int(data['season']) if 'season' in data and not data['season'] == None else None episode = int(data['episode']) if 'episode' in data and not data['episode'] == None else None pack = data['pack'] if 'pack' in data else False packCount = data['packcount'] if 'packcount' in data else None if 'tvshowtitle' in data: # Search special episodes by name. All special episodes are added to season 0 by Trakt and TVDb. Hence, do not search by filename (eg: S02E00), since the season is not known. if (season == 0 or episode == 0) and ('title' in data and not data['title'] == None and not data['title'] == ''): title = '%s %s' % (data['tvshowtitle'], data['title']) # Change the title for metadata filtering. query = title ignoreContains = len(data['title']) / float(len(title)) # Increase the required ignore ration, since otherwise individual episodes and season packs are found as well. else: if pack: query = '%s %d' % (title, season) else: query = '%s S%02dE%02d' % (title, season, episode) else: query = '%s %d' % (title, year) query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', ' ', query) query = urllib.quote_plus(query) if not self._query(query): return sources pageLimit = tools.Settings.getInteger('scraping.providers.pages') pageCounter = 0 page = 1 # Pages start at 1 added = False timerEnd = tools.Settings.getInteger('scraping.providers.timeout') - 8 timer = tools.Time(start = True) while True: # Stop searching 8 seconds before the provider timeout, otherwise might continue searching, not complete in time, and therefore not returning any links. if timer.elapsed() > timerEnd: break pageCounter += 1 if pageLimit > 0 and pageCounter > pageLimit: break urlNew = (self.base_link + self.search_link) % (query, page) html = BeautifulSoup(client.request(urlNew)) htmlTable = html.find_all('table', class_ = 'search-table')[0] htmlRows = htmlTable.find_all('tr', recursive = False) page += 1 added = False for i in range(len(htmlRows)): htmlRow = htmlRows[i] htmlColumns = htmlRow.find_all('td', recursive = False) # Name htmlName = htmlColumns[0].getText().strip() # Size htmlSize = htmlColumns[2].getText().strip() # Link htmlLink = htmlColumns[0].find_all('a')[0]['href'].strip() htmlLink = network.Container(htmlLink).torrentMagnet(title = title) # Seeds htmlSeeds = int(htmlColumns[3].getText().strip()) # Metadata meta = metadata.Metadata(name = htmlName, title = title, titles = titles, year = year, season = season, episode = episode, pack = pack, packCount = packCount, link = htmlLink, size = htmlSize, seeds = htmlSeeds) # Ignore meta.ignoreAdjust(contains = ignoreContains) if meta.ignore(True): continue # Add sources.append({'url' : htmlLink, 'debridonly' : False, 'direct' : False, 'source' : 'torrent', 'language' : self.language[0], 'quality': meta.videoQuality(), 'metadata' : meta, 'file' : htmlName}) added = True if not added: # Last page reached with a working torrent break return sources except: return sources
def sources(self, url, hostDict, hostprDict): sources = [] try: if url == None: raise Exception() ignoreContains = None data = self._decode(url) if 'exact' in data and data['exact']: query = title = data[ 'tvshowtitle'] if 'tvshowtitle' in data else data['title'] titles = None year = None season = None episode = None pack = False packCount = None else: title = data['tvshowtitle'] if 'tvshowtitle' in data else data[ 'title'] titles = data[ 'alternatives'] if 'alternatives' in data else None year = int( data['year'] ) if 'year' in data and not data['year'] == None else None season = int( data['season'] ) if 'season' in data and not data['season'] == None else None episode = int( data['episode']) if 'episode' in data and not data[ 'episode'] == None else None pack = data['pack'] if 'pack' in data else False packCount = data['packcount'] if 'packcount' in data else None if 'tvshowtitle' in data: # Search special episodes by name. All special episodes are added to season 0 by Trakt and TVDb. Hence, do not search by filename (eg: S02E00), since the season is not known. if (season == 0 or episode == 0) and ('title' in data and not data['title'] == None and not data['title'] == ''): title = '%s %s' % ( data['tvshowtitle'], data['title'] ) # Change the title for metadata filtering. query = title ignoreContains = len(data['title']) / float( len(title) ) # Increase the required ignore ration, since otherwise individual episodes and season packs are found as well. else: if pack: query = '%s %d' % (title, season) else: query = '%s S%02dE%02d' % (title, season, episode) else: query = '%s %d' % (title, year) query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', ' ', query) if not self._query(query): return sources query = urllib.quote_plus(query) category = self.category_shows if 'tvshowtitle' in data else self.category_movies url = urlparse.urljoin(self.base_link, self.search_link) pageLimit = tools.Settings.getInteger('scraping.providers.pages') pageCounter = 0 page = 1 added = False timerEnd = tools.Settings.getInteger( 'scraping.providers.timeout') - 8 timer = tools.Time(start=True) ''' while True: # Stop searching 8 seconds before the provider timeout, otherwise might continue searching, not complete in time, and therefore not returning any links. if timer.elapsed() > timerEnd: break pageCounter += 1 if pageLimit > 0 and pageCounter > pageLimit: break urlNew = url % (category, query, page) html = client.request(urlNew) # Demonoid does not have a closing tag for the rows. # This causes BeautifulSoup to only detect the first row. # Manually add a closing </tr> tag, except fore the first row. html = html.replace('<tr align="left" bgcolor="#CCCCCC">', '<tr align="left" bgcolor="">', 1) html = html.replace('<tr align="left" bgcolor="#CCCCCC">', '</tr><tr align="left" bgcolor="#CCCCCC">') html = BeautifulSoup(html) page += 1 added = False htmlTable = html.find_all('td', class_ = 'ctable_content_no_pad')[0].find_all('table', recursive = False)[1] htmlRows = html.find_all('tr') i = 0 while i < len(htmlRows): try: htmlRow = htmlRows[i] i += 1 # Normal loop increment. if len(htmlRow.find_all('td', {'rowspan' : '2'})) == 0: continue # Name htmlName = htmlRow.find_all('td', {'colspan' : '9'})[0].find_all('a')[0].getText().strip() htmlRow = htmlRows[i] i += 1 # Go to next row, because items are split over to lines. # Size htmlSize = htmlColumns[3].getText().strip() # Link htmlLink = htmlColumns[2].find_all('a')[0]['href'] # Seeds htmlSeeds = int(htmlColumns[6].getText().strip()) items = htmlColumns[0].find_all('a') # Release try: htmlRelease = items[1].getText() if not 'other' in htmlRelease.lower(): htmlName += ' ' + htmlRelease except: pass # Language try: htmlLanguage = items[2].getText() except: htmlLanguage = None # Metadata meta = metadata.Metadata(name = htmlName, title = title, titles = titles, year = year, season = season, episode = episode, pack = pack, packCount = packCount, link = htmlLink, size = htmlSize, seeds = htmlSeeds, languageAudio = htmlLanguage) # Ignore meta.ignoreAdjust(contains = ignoreContains) if meta.ignore(True): continue # Add sources.append({'url' : htmlLink, 'debridonly' : False, 'direct' : False, 'source' : 'torrent', 'language' : self.language[0], 'quality': meta.videoQuality(), 'metadata' : meta, 'file' : htmlName}) added = True except: pass ''' while True: # Stop searching 8 seconds before the provider timeout, otherwise might continue searching, not complete in time, and therefore not returning any links. if timer.elapsed() > timerEnd: break pageCounter += 1 if pageLimit > 0 and pageCounter > pageLimit: break urlNew = url % (category, query, page) html = client.request(urlNew) page += 1 added = False htmlRows = re.findall( '<!--\s*tstart\s*-->(.*?)<tr\s*align="left"\s*bgcolor="#CCCCCC">', html, re.M | re.S) htmlRows = ['<tr><td>' + i for i in htmlRows] for htmlRow in htmlRows: try: htmlRow = BeautifulSoup(htmlRow) htmlColumns = htmlRow.find_all('td') # Name htmlName = htmlRow.find_all('a')[1].getText().strip() # Size htmlSize = htmlColumns[4].getText().strip() # Link htmlLink = htmlRow.find_all('a')[1]['href'] htmlLink = urlparse.urljoin(self.base_link, htmlLink) htmlLink = re.search('genidy=(.*)', htmlLink, re.IGNORECASE) if not htmlLink: continue htmlLink = self.download_link % htmlLink.group(1) # Seeds try: htmlSeeds = int(htmlColumns[7].getText().strip()) except: htmlSeeds = 0 items = htmlColumns[0].find_all('a') # Metadata meta = metadata.Metadata(name=htmlName, title=title, titles=titles, year=year, season=season, episode=episode, pack=pack, packCount=packCount, link=htmlLink, size=htmlSize, seeds=htmlSeeds) # Ignore meta.ignoreAdjust(contains=ignoreContains) if meta.ignore(True): continue # Add sources.append({ 'url': htmlLink, 'debridonly': False, 'direct': False, 'source': 'torrent', 'language': self.language[0], 'quality': meta.videoQuality(), 'metadata': meta, 'file': htmlName }) added = True except: pass if not added: # Last page reached with a working torrent break return sources except: return sources
def sources(self, url, hostDict, hostprDict): sources = [] try: if url == None: raise Exception() data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) if 'exact' in data and data['exact']: query = title = data[ 'tvshowtitle'] if 'tvshowtitle' in data else data['title'] year = None season = None episode = None pack = False packCount = None else: title = data['tvshowtitle'] if 'tvshowtitle' in data else data[ 'title'] year = int( data['year'] ) if 'year' in data and not data['year'] == None else None season = int( data['season'] ) if 'season' in data and not data['season'] == None else None episode = int( data['episode']) if 'episode' in data and not data[ 'episode'] == None else None pack = data['pack'] if 'pack' in data else False packCount = data['packcount'] if 'packcount' in data else None if 'tvshowtitle' in data: if pack: query = '%s %d' % (title, season) else: query = '%s S%02dE%02d' % (title, season, episode) else: query = '%s %d' % (title, year) query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', ' ', query) category = self.category_shows if 'tvshowtitle' in data else self.category_movies url = urlparse.urljoin(self.base_link, self.search_link) pageLimit = tools.Settings.getInteger('scraping.providers.pages') pageCounter = 0 page = 1 # Pages start at 1 added = False timerEnd = tools.Settings.getInteger( 'scraping.providers.timeout') - 8 timer = tools.Time(start=True) while True: # Stop searching 8 seconds before the provider timeout, otherwise might continue searching, not complete in time, and therefore not returning any links. if timer.elapsed() > timerEnd: break pageCounter += 1 if pageLimit > 0 and pageCounter > pageLimit: break urlNew = url % (category, urllib.quote_plus(query), page) html = client.request(urlNew) # HTML is corrupt. Try to fix it manually. try: indexStart = html.find('class="table2"') indexStart = html.find('<tr bgcolor', indexStart) indexEnd = html.find('search_stat', indexStart) html = html[indexStart:indexEnd] indexEnd = html.rfind('</td>') + 5 html = html[:indexEnd] html = html.replace('</a></td>', '</td>') html = '<table>' + html + '</tr></table>' except: pass html = BeautifulSoup(html) page += 1 added = False htmlRows = html.find_all( 'tr' ) # Do not search further down the tree (just the direct children), because that will also retrieve the header row. for i in range(len(htmlRows)): htmlRow = htmlRows[i] htmlColumns = htmlRow.find_all('td') htmlInfo = htmlColumns[0].find_all('div')[0] # Name htmlName = htmlInfo.find_all( 'a', recursive=False)[1].getText().strip() # Link htmlHash = htmlInfo.find_all('a', recursive=False)[0]['href'] indexStart = htmlHash.find('torrent/') if indexStart < 0: continue indexStart += 8 indexEnd = htmlHash.find('.torrent', indexStart) if indexEnd < 0: continue htmlHash = htmlHash[indexStart:indexEnd] if not tools.Hash.valid(htmlHash): continue htmlLink = network.Container(htmlHash).torrentMagnet( title=query) # Size htmlSize = htmlColumns[2].getText().strip() # Seeds htmlSeeds = int(htmlColumns[3].getText().replace( ',', '').replace(' ', '')) # Metadata meta = metadata.Metadata(name=htmlName, title=title, year=year, season=season, episode=episode, pack=pack, packCount=packCount, link=htmlLink, size=htmlSize, seeds=htmlSeeds) # Ignore if meta.ignore(True): continue # Add sources.append({ 'url': htmlLink, 'debridonly': False, 'direct': False, 'source': 'torrent', 'language': self.language[0], 'quality': meta.videoQuality(), 'metadata': meta, 'file': htmlName }) added = True if not added: # Last page reached with a working torrent break return sources except: return sources
def sources(self, url, hostDict, hostprDict): self.tSources = [] try: if url == None: raise Exception() ignoreContains = None data = self._decode(url) if 'exact' in data and data['exact']: query = title = data['tvshowtitle'] if 'tvshowtitle' in data else data['title'] titles = None year = None season = None episode = None pack = False packCount = None else: title = data['tvshowtitle'] if 'tvshowtitle' in data else data['title'] titles = data['alternatives'] if 'alternatives' in data else None year = int(data['year']) if 'year' in data and not data['year'] == None else None season = int(data['season']) if 'season' in data and not data['season'] == None else None episode = int(data['episode']) if 'episode' in data and not data['episode'] == None else None pack = data['pack'] if 'pack' in data else False packCount = data['packcount'] if 'packcount' in data else None if 'tvshowtitle' in data: # Search special episodes by name. All special episodes are added to season 0 by Trakt and TVDb. Hence, do not search by filename (eg: S02E00), since the season is not known. if (season == 0 or episode == 0) and ('title' in data and not data['title'] == None and not data['title'] == ''): title = '%s %s' % (data['tvshowtitle'], data['title']) # Change the title for metadata filtering. query = title ignoreContains = len(data['title']) / float(len(title)) # Increase the required ignore ration, since otherwise individual episodes and season packs are found as well. else: if pack: query = '%s %d' % (title, season) else: query = '%s S%02dE%02d' % (title, season, episode) else: query = '%s %d' % (title, year) query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', ' ', query) query = urllib.quote_plus(query) if not self._query(query): return sources url = urlparse.urljoin(self.base_link, self.search_link) pageLimit = tools.Settings.getInteger('scraping.providers.pages') pageCounter = 0 # Page starts at 1, but incremented before first request. timerTimeout = tools.Settings.getInteger('scraping.providers.timeout') timerEnd = timerTimeout - 8 timer = tools.Time(start = True) threads = [] self.tLock = threading.Lock() while True: try: # Stop searching 8 seconds before the provider timeout, otherwise might continue searching, not complete in time, and therefore not returning any links. if timer.elapsed() > timerEnd: break added = False pageCounter += 1 if pageLimit > 0 and pageCounter > pageLimit: break html = BeautifulSoup(client.request(url % (query, pageCounter))) htmlTable = html.find_all('table', class_ = 'results') htmlTable = htmlTable[len(htmlTable) - 1] htmlRows = htmlTable.find_all('tr') for i in range(1, len(htmlRows)): try: htmlRow = htmlRows[i] htmlColumns = htmlRow.find_all('td', recursive = False) # Use children and no further. # Name htmlName = htmlColumns[0].find_all('a')[0].getText() # Link htmlLink = urlparse.urljoin(self.base_link, htmlColumns[0].find_all('a')[0]['href']) # Size htmlSize = htmlColumns[1].getText() # Age htmlAge = htmlColumns[3].getText() htmlAge = int(convert.ConverterDuration(htmlAge).value(convert.ConverterDuration.UnitDay)) # Metadata meta = metadata.Metadata(name = htmlName, title = title, titles = titles, year = year, season = season, episode = episode, pack = pack, packCount = packCount, link = htmlLink, size = htmlSize, age = htmlAge) # Ignore meta.ignoreAdjust(contains = ignoreContains, length = 0.3) if meta.ignore(False): continue # Add self.tLock.acquire() self.tSources.append({'url' : None, 'debridonly' : False, 'direct' : False, 'source' : 'usenet', 'language' : self.language[0], 'quality': meta.videoQuality(), 'metadata' : meta, 'file' : htmlName}) self.tLock.release() added = True # Link thread = threading.Thread(target = self._link, args = (htmlLink, len(self.tSources) - 1)) threads.append(thread) thread.start() except: pass if not added: break except: break # First filter out all non-related links before doing the hash lookup. timerTimeout -= 2 while True: if timer.elapsed() > timerTimeout: break if not any([thread.is_alive() for thread in threads]): break tools.Time.sleep(0.5) try: self.tLock.release() except: pass except: try: self.tLock.release() except: pass return [i for i in self.tSources if i['url']]
def sources(self, url, hostDict, hostprDict): self.tSources = [] try: if url == None: raise Exception() if not self.enabled or self.username == '' or self.password == '': raise Exception() data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) show = 'tvshowtitle' in data title = data['tvshowtitle'] if show else data['title'] titleYear = '%s S%02dE%02d' % (data['tvshowtitle'], int(data['season']), int(data['episode'])) if show else '%s (%s)' % (data['title'], data['year']) if 'exact' in data and data['exact']: query = title = data['tvshowtitle'] if 'tvshowtitle' in data else data['title'] year = None season = None episode = None pack = False packCount = None else: year = int(data['year']) if 'year' in data and not data['year'] == None else None season = int(data['season']) if 'season' in data and not data['season'] == None else None episode = int(data['episode']) if 'episode' in data and not data['episode'] == None else None pack = data['pack'] if 'pack' in data else False packCount = data['packcount'] if 'packcount' in data else None if show: subcategory = self.subcategories_show.values()[0] if len(self.subcategories_show) == 1 else self.subcategory_any else: subcategory = self.subcategories_movie.values()[0] if len(self.subcategories_movie) == 1 else self.subcategory_any if show: if pack: query = '%s S%02d' % (title, season) else: query = '%s S%02dE%02d' % (title, season, episode) else: query = '%s %d' % (title, year) query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', ' ', query) querySplit = query.split() url = urlparse.urljoin(self.base_link, self.search_link) query = urllib.quote_plus(query) pageLimit = tools.Settings.getInteger('scraping.providers.pages') pageCounter = 0 page = 0 added = False timerTimeout = tools.Settings.getInteger('scraping.providers.timeout') timerEnd = timerTimeout - 8 timer = tools.Time(start = True) threads = [] self.tLock = threading.Lock() while True: # Stop searching 8 seconds before the provider timeout, otherwise might continue searching, not complete in time, and therefore not returning any links. if timer.elapsed() > timerEnd: break pageCounter += 1 if pageLimit > 0 and pageCounter > pageLimit: break urlNew = url % (self.category_video, subcategory, query, page) html = BeautifulSoup(client.request(urlNew)) page += 25 added = False htmlTables = html.find_all('table', class_ = 'table') if htmlTables: htmlTable = htmlTables[0] htmlTbody = htmlTable.find_all('tbody')[0] htmlRows = htmlTbody.find_all('tr', recursive = False) for i in range(len(htmlRows)): # Stop searching 8 seconds before the provider timeout, otherwise might continue searching, not complete in time, and therefore not returning any links. if timer.elapsed() > timerEnd: break htmlRow = htmlRows[i] # Name htmlInfo = htmlRows[i].find_all('a', href = True)[1] htmlName = htmlInfo.getText() # Category if subcategory is self.subcategory_any: htmlCategory = htmlRow.find_all('div', class_ = 'hidden')[0].getText() if show and len(self.subcategories_show) > 1: if htmlCategory not in self.subcategories_show.keys(): continue elif len(self.subcategories_show) > 1: if htmlCategory not in self.subcategories_movie.keys(): continue # Size htmlSize = re.sub('([mMkKgGtT]?)[oO]', '\\1b', htmlRow.find_all('td')[5].getText()) # Link htmlLink = self.base_link + self.download_link + str(htmlInfo.get('href').encode('utf-8')).split('/')[-1].split('-')[0] # Seeds htmlSeeds = int(htmlRow.find_all('td')[7].getText()) # Metadata meta = metadata.Metadata(name = htmlName, title = title, year = year, season = season, episode = episode, pack = pack, packCount = packCount, link = htmlLink, size = htmlSize, seeds = htmlSeeds) # Ignore if meta.ignore(True): continue # Add self.tLock.acquire() self.tSources.append({'url' : htmlLink, 'debridonly' : False, 'direct' : False, 'source' : 'torrent', 'language' : self.language[0], 'quality': meta.videoQuality(), 'metadata' : meta, 'file' : htmlName}) self.tLock.release() added = True # Hash if self.inspection: htmlHash = urllib.quote(str(htmlInfo.get('href').encode('utf-8')), ':/+') thread = threading.Thread(target = self._hash, args = (htmlHash, len(self.tSources) - 1)) threads.append(thread) thread.start() if not added: # Last page reached with a working torrent break # First filter out all non-related links before doing the hash lookup. if self.inspection: timerTimeout -= 2 while True: if timer.elapsed() > timerTimeout: break if not any([thread.is_alive() for thread in threads]): break tools.Time.sleep(0.3) try: self.tLock.release() except: pass return self.tSources except: tools.Logger.error() try: self.tLock.release() except: pass return self.tSources
def sources(self, url, hostDict, hostprDict): sources = [] try: if url == None: raise Exception() data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) if 'exact' in data and data['exact']: query = title = data['tvshowtitle'] if 'tvshowtitle' in data else data['title'] year = None season = None episode = None pack = False packCount = None else: title = data['tvshowtitle'] if 'tvshowtitle' in data else data['title'] year = int(data['year']) if 'year' in data and not data['year'] == None else None season = int(data['season']) if 'season' in data and not data['season'] == None else None episode = int(data['episode']) if 'episode' in data and not data['episode'] == None else None pack = data['pack'] if 'pack' in data else False packCount = data['packcount'] if 'packcount' in data else None if 'tvshowtitle' in data: if pack: query = '%s %d' % (title, season) else: query = '%s S%02dE%02d' % (title, season, episode) else: query = '%s %d' % (title, year) query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', ' ', query) url = urlparse.urljoin(self.base_link, self.search_link) category = self.category_shows if ('tvshowtitle' in data and not data['tvshowtitle'] == None and not data['tvshowtitle'] == '') else self.category_movies pageLimit = tools.Settings.getInteger('scraping.providers.pages') pageCounter = 0 page = 1 added = False timerEnd = tools.Settings.getInteger('scraping.providers.timeout') - 8 timer = tools.Time(start = True) while True: # Stop searching 8 seconds before the provider timeout, otherwise might continue searching, not complete in time, and therefore not returning any links. if timer.elapsed() > timerEnd: break pageCounter += 1 if pageLimit > 0 and pageCounter > pageLimit: break urlNew = url % (category, urllib.quote_plus(query), page) html = BeautifulSoup(client.request(urlNew)) page += 1 added = False # NB: Do not use "tbody class=results", since the table has inner div/style that breaks parsing. htmlRows = html.find_all('tr', class_ = 'result') # Do not search further down the tree (just the direct children), because that will also retrieve the header row. for i in range(len(htmlRows)): try: htmlRow = htmlRows[i] htmlColumns = htmlRow.find_all('td', recursive = False) # Name htmlName = htmlColumns[0].find_all('a')[0].getText().strip() # Size htmlSize = htmlColumns[1].getText().strip() # Link htmlLink = '' htmlLinks = htmlColumns[0].find_all('a') for j in range(len(htmlLinks)): link = htmlLinks[j]['href'] if link.startswith('magnet:'): htmlLink = link break # Seeds htmlSeeds = int(re.sub('[^0-9]', '', htmlColumns[4].getText().strip())) # Metadata meta = metadata.Metadata(name = htmlName, title = title, year = year, season = season, episode = episode, pack = pack, packCount = packCount, link = htmlLink, size = htmlSize, seeds = htmlSeeds) # Ignore if meta.ignore(True): continue # Add sources.append({'url' : htmlLink, 'debridonly' : False, 'direct' : False, 'source' : 'torrent', 'language' : self.language[0], 'quality': meta.videoQuality(), 'metadata' : meta, 'file' : htmlName}) added = True except: pass if not added: # Last page reached with a working torrent break return sources except: return sources
def sources(self, url, hostDict, hostprDict): sources = [] try: if url == None: raise Exception() data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) type = self.type_tvshows if ( 'tvshowtitle' in data and not data['tvshowtitle'] == None and not data['tvshowtitle'] == '') else self.type_movies title = data['tvshowtitle'] if 'tvshowtitle' in data else data[ 'title'] titleYear = '%s S%02dE%02d' % ( data['tvshowtitle'], int(data['season']), int(data['episode']) ) if 'tvshowtitle' in data else '%s (%s)' % (data['title'], data['year']) title = data['tvshowtitle'] if 'tvshowtitle' in data else data[ 'title'] year = int( data['year'] ) if 'year' in data and not data['year'] == None else None season = int( data['season'] ) if 'season' in data and not data['season'] == None else None episode = int( data['episode'] ) if 'episode' in data and not data['episode'] == None else None pack = data['pack'] if 'pack' in data else False if 'tvshowtitle' in data: if pack: query = '%s %d' % (title, season) else: query = '%s S%02dE%02d' % (title, season, episode) else: query = '%s %d' % (title, year) query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', ' ', query) url = urlparse.urljoin(self.base_link, self.search_link) page = 1 added = False timerEnd = tools.Settings.getInteger( 'scraping.providers.timeout') - 8 timer = tools.Time(start=True) while True: # Stop searching 8 seconds before the provider timeout, otherwise might continue searching, not complete in time, and therefore not returning any links. if timer.elapsed() > timerEnd: break urlNew = url % (urllib.quote_plus(query), type, page) html = BeautifulSoup(client.request(urlNew)) page += 1 added = False htmlTable = html.find_all('div', id='div2child')[0] htmlRows = htmlTable.find_all( 'div', class_='resultdiv', recursive=False ) # Do not search further down the tree (just the direct children), because that will also retrieve the header row. for i in range(len(htmlRows)): htmlRow = htmlRows[i] htmlInfo = htmlRow.find_all('div', class_='resultdivbotton')[0] # Name htmlName = htmlRow.find_all( 'div', class_='resultdivtop')[0].find_all( 'div', class_='resultdivtopname')[0].getText().strip() # Size htmlSize = htmlInfo.find_all( 'div', class_='resultlength')[0].find_all( 'div', class_='resultdivbottonlength')[0].getText() # Link htmlHash = htmlInfo.find_all( 'div', class_='hideinfohash')[0].getText() htmlLink = network.Container(htmlHash).torrentMagnet( title=titleYear) # Seeds htmlSeeds = int( htmlInfo.find_all( 'div', class_='resultseed')[0].find_all( 'div', class_='resultdivbottonseed')[0].getText()) # Metadata meta = metadata.Metadata(name=htmlName, title=title, year=year, season=season, episode=episode, pack=pack, link=htmlLink, size=htmlSize, seeds=htmlSeeds) # Ignore if meta.ignore(True): continue # Add sources.append({ 'url': htmlLink, 'debridonly': False, 'direct': False, 'source': 'torrent', 'language': self.language[0], 'quality': meta.videoQuality(), 'info': meta.information(), 'file': htmlName }) added = True if not added: # Last page reached with a working torrent break return sources except: return sources
def sources(self, url, hostDict, hostprDict): sources = [] try: if url == None: raise Exception() data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) if 'exact' in data and data['exact']: query = title = data['tvshowtitle'] if 'tvshowtitle' in data else data['title'] year = None season = None episode = None pack = False packCount = None else: title = data['tvshowtitle'] if 'tvshowtitle' in data else data['title'] year = int(data['year']) if 'year' in data and not data['year'] == None else None season = int(data['season']) if 'season' in data and not data['season'] == None else None episode = int(data['episode']) if 'episode' in data and not data['episode'] == None else None pack = data['pack'] if 'pack' in data else False packCount = data['packcount'] if 'packcount' in data else None if 'tvshowtitle' in data: if pack: query = '%s %d' % (title, season) else: query = '%s S%02dE%02d' % (title, season, episode) else: query = '%s %d' % (title, year) query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', ' ', query) url = urlparse.urljoin(self.base_link, self.search_link) category = self.category_tvshows if ('tvshowtitle' in data and not data['tvshowtitle'] == None and not data['tvshowtitle'] == '') else self.category_movies url += category pageLimit = tools.Settings.getInteger('scraping.providers.pages') pageCounter = 0 page = 0 # Pages start at 0 added = False timerEnd = tools.Settings.getInteger('scraping.providers.timeout') - 8 timer = tools.Time(start = True) while True: # Stop searching 8 seconds before the provider timeout, otherwise might continue searching, not complete in time, and therefore not returning any links. if timer.elapsed() > timerEnd: break pageCounter += 1 if pageLimit > 0 and pageCounter > pageLimit: break urlNew = url % (urllib.quote_plus(query), page) html = client.request(urlNew) htmlLower = html.lower() start = htmlLower.index('<img') end = htmlLower.index('>', start) + 1 html = html[:start] + html[end:] html = html.replace('</b></a><td', '</b></a></td><td') html = html.replace('<shn>', '').replace('</shn>', '') html = html.replace('<shnn>', '').replace('</shnn>', '') html = html.replace('<shn2>', '').replace('</shn2>', '') html = BeautifulSoup(html) page += 1 added = False htmlRows = html.find_all('tr', class_ = 't-row') # Missing closing tags. Look for rows directly instead. for i in range(len(htmlRows)): try: htmlRow = htmlRows[i] htmlColumns = htmlRow.find_all('th') tools.Logger.log("FFFGGGx: "+str(len(htmlColumns))) # Name htmlName = htmlRow.find_all('td', recursive = False)[0].getText().strip() # Size htmlSize = htmlColumns[2].getText().strip() # Link htmlLink = htmlRow.find_all('td', recursive = False)[0].find_all('a')[1]['href'].strip() htmlLink = urlparse.urljoin(self.base_link, htmlLink) # Seeds htmlSeeds = int(re.sub('[^0-9]', '', htmlColumns[4].getText().replace(',', '').replace('.', '').strip())) # Metadata meta = metadata.Metadata(name = htmlName, title = title, year = year, season = season, episode = episode, pack = pack, packCount = packCount, link = htmlLink, size = htmlSize, seeds = htmlSeeds) # Ignore if meta.ignore(True): continue # Add sources.append({'url' : htmlLink, 'debridonly' : False, 'direct' : False, 'source' : 'torrent', 'language' : self.language[0], 'quality': meta.videoQuality(), 'metadata' : meta, 'file' : htmlName}) added = True except: pass if not added: # Last page reached with a working torrent break return sources except: return sources
def sources(self, url, hostDict, hostprDict): self.items = [ ] # NB: The same object of the provider is used for both normal episodes and season packs. Make sure it is cleared from the previous run. sources = [] try: if url == None: raise Exception() core = premiumize.Core() if not core.accountValid(): raise Exception() data = self._decode(url) if 'exact' in data and data['exact']: title = data['tvshowtitle'] if 'tvshowtitle' in data else data[ 'title'] titles = None year = None season = None episode = None pack = False else: title = data['tvshowtitle'] if 'tvshowtitle' in data else data[ 'title'] titles = data[ 'alternatives'] if 'alternatives' in data else None year = int( data['year'] ) if 'year' in data and not data['year'] == None else None season = int( data['season'] ) if 'season' in data and not data['season'] == None else None episode = int( data['episode']) if 'episode' in data and not data[ 'episode'] == None else None pack = data['pack'] if 'pack' in data else False if not self._query(title, year, season, episode, pack): return sources timerEnd = tools.Settings.getInteger( 'scraping.providers.timeout') - 3 timer = tools.Time(start=True) threads = [] ids = [] items = core._items() for item in items: id = item['id'] if not id in ids: # The RSS feed directory returns the same episodes individually and as a pack. Only add it once. meta = metadata.Metadata(name=item['name'], title=title, titles=titles, year=year, season=season, episode=episode) if ((not pack and item['name'] == source.FeedsName) or not pack) and not meta.ignore(size=False): if item['type'] == 'file': item['video'] = item self.items.append(item) else: threads.append( threading.Thread(target=self._item, args=(item['id'], None, season, episode))) [thread.start() for thread in threads] while True: if timer.elapsed() > timerEnd: break if all([not thread.is_alive() for thread in threads]): break time.sleep(0.5) try: self.mutex.acquire() except: pass items = self.items try: self.mutex.release() except: pass for item in items: try: jsonName = item['video']['name'] try: if not item['name'] == jsonName and not item[ 'name'] == 'root': jsonName = item[ 'name'] + ' - ' + jsonName # Sometimes metadata, like quality, is only in the folder name, not the file name. except: pass jsonLink = item['video']['link'] jsonSize = item['video']['size']['bytes'] # RAR Files if jsonLink.lower().endswith('.rar'): continue # Metadata meta = metadata.Metadata(name=jsonName, title=title, titles=titles, year=year, season=season, episode=episode, size=jsonSize, pack=pack) # Add sources.append({ 'url': jsonLink, 'premium': True, 'debridonly': True, 'direct': True, 'memberonly': True, 'source': 'Premiumize', 'language': self.language[0], 'quality': meta.videoQuality(), 'metadata': meta, 'file': jsonName }) except: pass return sources except: return sources
def sources(self, url, hostDict, hostprDict): sources = [] try: if url == None: raise Exception() data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) title = data['tvshowtitle'] if 'tvshowtitle' in data else data[ 'title'] titleYear = '%s S%02dE%02d' % ( data['tvshowtitle'], int(data['season']), int(data['episode']) ) if 'tvshowtitle' in data else '%s (%s)' % (data['title'], data['year']) if 'exact' in data and data['exact']: query = title = data[ 'tvshowtitle'] if 'tvshowtitle' in data else data['title'] year = None season = None episode = None pack = False packCount = None else: title = data['tvshowtitle'] if 'tvshowtitle' in data else data[ 'title'] year = int( data['year'] ) if 'year' in data and not data['year'] == None else None season = int( data['season'] ) if 'season' in data and not data['season'] == None else None episode = int( data['episode']) if 'episode' in data and not data[ 'episode'] == None else None pack = data['pack'] if 'pack' in data else False packCount = data['packcount'] if 'packcount' in data else None if 'tvshowtitle' in data: if pack: query = '%s %d' % (title, season) else: query = '%s S%02dE%02d' % (title, season, episode) else: query = '%s %d' % (title, year) query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', ' ', query) url = urlparse.urljoin(self.base_link, self.search_link) pageLimit = tools.Settings.getInteger('scraping.providers.pages') pageCounter = 0 page = 1 added = False timerEnd = tools.Settings.getInteger( 'scraping.providers.timeout') - 8 timer = tools.Time(start=True) while True: # Stop searching 8 seconds before the provider timeout, otherwise might continue searching, not complete in time, and therefore not returning any links. if timer.elapsed() > timerEnd: break pageCounter += 1 if pageLimit > 0 and pageCounter > pageLimit: break urlNew = url % (query, page) # Do not plus/quote query. html = BeautifulSoup(client.request(urlNew)) page += 1 added = False htmlRows = html.find_all('div', class_='rs') for i in range(len(htmlRows)): try: htmlRow = htmlRows[i] htmlInfo = htmlRow.find_all('div', class_='sbar')[0] # Name htmlName = htmlRow.find_all( 'div', class_='title')[0].getText().strip() # Size htmlSize = htmlInfo.find_all('span')[3].find_all( 'b')[0].getText().strip() # Link htmlLink = None for j in htmlInfo.find_all('a'): if network.Container(j['href']).torrentIsMagnet(): htmlLink = j['href'] break # Seeds # No seeds, estimate with popularity. try: htmlSeeds = int( htmlInfo.find_all('span')[5].find_all('b') [0].getText().strip()) htmlSeeds /= 30000 htmlSeeds = max(1, htmlSeeds) except: htmlSeeds = 1 # Metadata meta = metadata.Metadata(name=htmlName, title=title, year=year, season=season, episode=episode, pack=pack, packCount=packCount, link=htmlLink, size=htmlSize, seeds=htmlSeeds) # Ignore if meta.ignore(True): continue # Add sources.append({ 'url': htmlLink, 'debridonly': False, 'direct': False, 'source': 'torrent', 'language': self.language[0], 'quality': meta.videoQuality(), 'metadata': meta, 'file': htmlName }) added = True except: pass if not added: # Last page reached with a working torrent break return sources except: return sources
def sources(self, url, hostDict, hostprDict): sources = [] try: if url == None: raise Exception() data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) if 'exact' in data and data['exact']: query = title = data[ 'tvshowtitle'] if 'tvshowtitle' in data else data['title'] type = None year = None season = None episode = None pack = False packCount = None else: type = 'tv' if 'tvshowtitle' in data else 'movie' title = data['tvshowtitle'] if 'tvshowtitle' in data else data[ 'title'] year = int( data['year'] ) if 'year' in data and not data['year'] == None else None season = int( data['season'] ) if 'season' in data and not data['season'] == None else None episode = int( data['episode']) if 'episode' in data and not data[ 'episode'] == None else None pack = data['pack'] if 'pack' in data else False packCount = data['packcount'] if 'packcount' in data else None if 'tvshowtitle' in data: if pack: query = '%s %d' % (title, season) else: query = '%s S%02dE%02d' % (title, season, episode) else: query = '%s %d' % (title, year) query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', ' ', query) query = query.replace(' ', '-') # Uses - not + to separate words. query = query.lower() # Only lower case letters work. url = urlparse.urljoin(self.base_link, self.search_link) pageLimit = tools.Settings.getInteger('scraping.providers.pages') pageCounter = 0 page = 1 added = False timerEnd = tools.Settings.getInteger( 'scraping.providers.timeout') - 8 timer = tools.Time(start=True) while True: # Stop searching 8 seconds before the provider timeout, otherwise might continue searching, not complete in time, and therefore not returning any links. if timer.elapsed() > timerEnd: break pageCounter += 1 if pageLimit > 0 and pageCounter > pageLimit: break # Entries are alphabetically categorized according to their first letter. urlNew = url % (query[0], urllib.quote_plus(query), page) html = BeautifulSoup(client.request(urlNew)) page += 1 added = False htmlTable = html.find_all('table', class_='download')[0] htmlRows = htmlTable.find_all( 'tbody', recursive=False)[0].find_all('tr', recursive=False) for i in range(0, len(htmlRows)): try: htmlRow = htmlRows[i] htmlColumns = htmlRow.find_all('td', recursive=False) # Type if type and not htmlColumns[3].getText().strip().lower( ) == type: continue # Name htmlName = htmlColumns[1].find_all( 'a', recursive=False)[0].getText().strip() # Size htmlSize = htmlColumns[5].getText().strip() # Link htmlLink = htmlColumns[0].find_all( 'a', recursive=False)[0]['href'].strip() # Seeds htmlSeeds = int(htmlColumns[6].getText().strip()) # Metadata meta = metadata.Metadata(name=htmlName, title=title, year=year, season=season, episode=episode, pack=pack, packCount=packCount, link=htmlLink, size=htmlSize, seeds=htmlSeeds) # Ignore if meta.ignore(True): continue # Add sources.append({ 'url': htmlLink, 'debridonly': False, 'direct': False, 'source': 'torrent', 'language': self.language[0], 'quality': meta.videoQuality(), 'metadata': meta, 'file': htmlName }) added = True except: pass if not added: # Last page reached with a working torrent break return sources except: return sources
def sources(self, url, hostDict, hostprDict): sources = [] try: if url == None: raise Exception() ignoreContains = None data = self._decode(url) if 'exact' in data and data['exact']: query = title = data[ 'tvshowtitle'] if 'tvshowtitle' in data else data['title'] titles = None year = None season = None episode = None pack = False packCount = None else: title = data['tvshowtitle'] if 'tvshowtitle' in data else data[ 'title'] titles = data[ 'alternatives'] if 'alternatives' in data else None year = int( data['year'] ) if 'year' in data and not data['year'] == None else None season = int( data['season'] ) if 'season' in data and not data['season'] == None else None episode = int( data['episode']) if 'episode' in data and not data[ 'episode'] == None else None pack = data['pack'] if 'pack' in data else False packCount = data['packcount'] if 'packcount' in data else None if 'tvshowtitle' in data: # Search special episodes by name. All special episodes are added to season 0 by Trakt and TVDb. Hence, do not search by filename (eg: S02E00), since the season is not known. if (season == 0 or episode == 0) and ('title' in data and not data['title'] == None and not data['title'] == ''): title = '%s %s' % ( data['tvshowtitle'], data['title'] ) # Change the title for metadata filtering. query = title ignoreContains = len(data['title']) / float( len(title) ) # Increase the required ignore ration, since otherwise individual episodes and season packs are found as well. else: if pack: query = '%s %d' % (title, season) else: query = '%s S%02dE%02d' % (title, season, episode) else: query = '%s %d' % (title, year) query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', ' ', query) if not self._query(query): return sources querySplit = query.split() url = urlparse.urljoin(self.base_link, self.search_link) pageLimit = tools.Settings.getInteger('scraping.providers.pages') pageCounter = 0 page = 0 # Pages start at 0 added = False timerEnd = tools.Settings.getInteger( 'scraping.providers.timeout') - 8 timer = tools.Time(start=True) while True: # Stop searching 8 seconds before the provider timeout, otherwise might continue searching, not complete in time, and therefore not returning any links. if timer.elapsed() > timerEnd: break pageCounter += 1 if pageLimit > 0 and pageCounter > pageLimit: break urlNew = url % (page, urllib.quote_plus(query)) dataPhp = client.request(urlNew) dataPhp = dataPhp.replace('<pre>', '').replace('</pre>', '') dataPhp = dataPhp[:-1] + '}' dataPhp = dataPhp.replace('array (', '{').replace('),', '},') dataPhp = dataPhp.replace('NULL,', 'null,').replace( ' => ', ' : ').replace('\'', '"') # Remove trailing commas dataPhp = re.sub(',[ \t\r\n]+}', '}', dataPhp) dataPhp = re.sub(',[ \t\r\n]+\]', ']', dataPhp) page += 1 added = False result = json.loads(dataPhp) for key, value in result.iteritems(): jsonName = value['title'] jsonSize = value['size'] jsonLink = value['magnet'] try: jsonSeeds = int(value['seeders']) except: jsonSeeds = None # Metadata meta = metadata.Metadata(name=jsonName, title=title, titles=titles, year=year, season=season, episode=episode, pack=pack, packCount=packCount, link=jsonLink, size=jsonSize, seeds=jsonSeeds) # Ignore meta.ignoreAdjust(contains=ignoreContains) if meta.ignore(True): continue # Ignore Name # TorrentProject has a lot of season packs, foreign titles, and other torrents that should be excluded. If the name does not contain the exact search string, ignore the result. if not all(q in jsonName for q in querySplit): continue # Add sources.append({ 'url': jsonLink, 'debridonly': False, 'direct': False, 'source': 'torrent', 'language': self.language[0], 'quality': meta.videoQuality(), 'metadata': meta, 'file': jsonName }) added = True if not added: # Last page reached with a working torrent break return sources except: return sources
def sources(self, url, hostDict, hostprDict): self.tSources = [] try: if url == None: raise Exception() ignoreContains = None data = self._decode(url) show = 'tvshowtitle' in data type = self.type_shows if 'tvshowtitle' in data else self.type_movies if 'exact' in data and data['exact']: title = data['tvshowtitle'] if 'tvshowtitle' in data else data[ 'title'] titles = None queries = [title] year = None season = None episode = None pack = False packCount = None packExceptions = None else: title = data['tvshowtitle'] if show else data['title'] titles = data[ 'alternatives'] if 'alternatives' in data else None year = int( data['year'] ) if 'year' in data and not data['year'] == None else None season = int( data['season'] ) if 'season' in data and not data['season'] == None else None episode = int( data['episode']) if 'episode' in data and not data[ 'episode'] == None else None pack = data['pack'] if 'pack' in data else False packCount = data['packcount'] if 'packcount' in data else None packExceptions = None if show: # Search special episodes by name. All special episodes are added to season 0 by Trakt and TVDb. Hence, do not search by filename (eg: S02E00), since the season is not known. if (season == 0 or episode == 0) and ('title' in data and not data['title'] == None and not data['title'] == ''): title = '%s %s' % ( data['tvshowtitle'], data['title'] ) # Change the title for metadata filtering. queries = [title] ignoreContains = len(data['title']) / float( len(title) ) # Increase the required ignore ration, since otherwise individual episodes and season packs are found as well. else: if pack: queries = [ '%s S%02d' % (title, season), '%s saison %d' % (title, season), '%s intégrale' % title ] packExceptions = [ 2 ] # Index of query where season pack file name detection should be ignored. else: queries = [ '%s S%02dE%02d' % (title, season, episode) ] else: queries = ['%s %d' % (title, year)] queries = [ re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', ' ', query) for query in queries ] if not self._query(queries): return self.tSources url = urlparse.urljoin(self.base_link, self.search_link) queries = [urllib.quote(query) for query in queries] # quote_plus does not work. timerTimeout = tools.Settings.getInteger( 'scraping.providers.timeout') timerEnd = timerTimeout - 8 timer = tools.Time(start=True) self.tThreadsSearches = [] self.tThreadsLinks = [] self.tLock = threading.Lock() for q in range(len(queries)): query = queries[q] packException = True if packExceptions and q in packExceptions else False thread = threading.Thread(target=self._search, args=(url, query, show, type, title, titles, year, season, episode, pack, packCount, packException, ignoreContains)) self.tThreadsSearches.append(thread) thread.start() while True: if timer.elapsed() > timerTimeout: break if not any([t.is_alive() for t in self.tThreadsSearches]): break tools.Time.sleep(0.5) # First filter out all non-related links before doing the hash lookup. timerTimeout -= 2 while True: if timer.elapsed() > timerTimeout: break if not any([t.is_alive() for t in self.tThreadsLinks]): break tools.Time.sleep(0.5) try: self.tLock.release() except: pass return self.tSources except: tools.Logger.error() try: self.tLock.release() except: pass return self.tSources
def sources(self, url, hostDict, hostprDict): sources = [] try: if url == None: raise Exception() data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) pack = None if 'exact' in data and data['exact']: query = title = data[ 'tvshowtitle'] if 'tvshowtitle' in data else data['title'] year = None season = None episode = None pack = False packCount = None else: title = data['tvshowtitle'] if 'tvshowtitle' in data else data[ 'title'] year = int( data['year'] ) if 'year' in data and not data['year'] == None else None season = int( data['season'] ) if 'season' in data and not data['season'] == None else None episode = int( data['episode']) if 'episode' in data and not data[ 'episode'] == None else None pack = data['pack'] if 'pack' in data else False packCount = data['packcount'] if 'packcount' in data else None if 'tvshowtitle' in data: if pack: query = '%s saison %d' % (title, season) else: query = '%s S%02dE%02d' % (title, season, episode) else: query = title # Do not include year, otherwise there are few results. query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', ' ', query) type = self.type_shows if 'tvshowtitle' in data else self.type_movies url = urlparse.urljoin(self.base_link, self.search_link) % ( type, urllib.quote_plus(query)) html = BeautifulSoup(client.request(url)) htmlTable = html.find_all( 'table', class_='cust-table')[0].find_all('tbody', recursive=False)[0] htmlRows = htmlTable.find_all('tr', recursive=False) self.tLock = threading.Lock() self.tLinks = [None] * len(htmlRows) threads = [] for i in range(len(htmlRows)): urlTorrent = self.base_link + htmlRows[i].find_all( 'td', recursive=False)[0].find_all('a')[0]['href'] threads.append( threading.Thread(target=self._link, args=(urlTorrent, i))) [thread.start() for thread in threads] timerEnd = tools.Settings.getInteger( 'scraping.providers.timeout') - 8 timer = tools.Time(start=True) while timer.elapsed() < timerEnd and any( [thread.is_alive() for thread in threads]): tools.Time.sleep(0.5) self.tLock.acquire( ) # Just lock in case the threads are still running. for i in range(len(htmlRows)): # Stop searching 8 seconds before the provider timeout, otherwise might continue searching, not complete in time, and therefore not returning any links. if timer.elapsed() > timerEnd: break htmlRow = htmlRows[i] htmlColumns = htmlRow.find_all('td', recursive=False) # Name htmlName = htmlColumns[0].getText().strip() if not 'tvshowtitle' in data: htmlName = re.sub( r"^(.*?)(TRUE|TRUEFRENCH|FRENCH|VOSTFR|VO)(.*)([0-9]{4})$", r"\1 \4 \2\3", htmlName) # Link htmlLink = self.tLinks[i] # Size htmlSize = htmlColumns[1].getText().strip().lower().replace( ' mo', 'MB').replace(' go', 'GB').replace(' o', 'b') # Seeds try: htmlSeeds = int(htmlColumns[2].getText().strip()) except: htmlSeeds = None # Metadata meta = metadata.Metadata(name=htmlName, title=title, year=year, season=season, episode=episode, pack=pack, packCount=packCount, link=htmlLink, size=htmlSize, seeds=htmlSeeds) # Ignore if meta.ignore(False): continue # Add sources.append({ 'url': htmlLink, 'debridonly': False, 'direct': False, 'source': 'torrent', 'language': self.language[0], 'quality': meta.videoQuality(), 'metadata': meta, 'file': htmlName }) self.tLock.release() return sources except: tools.Logger.error() return sources
def sources(self, url, hostDict, hostprDict): sources = [] try: if url == None: raise Exception() data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) title = data['tvshowtitle'] if 'tvshowtitle' in data else data[ 'title'] year = int( data['year'] ) if 'year' in data and not data['year'] == None else None season = int( data['season'] ) if 'season' in data and not data['season'] == None else None episode = int( data['episode'] ) if 'episode' in data and not data['episode'] == None else None pack = data['pack'] if 'pack' in data else False if 'tvshowtitle' in data: if pack: query = '%s %d' % (title, season) else: query = '%s S%02dE%02d' % (title, season, episode) else: query = '%s %d' % (title, year) query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', ' ', query) url = urlparse.urljoin(self.base_link, self.search_link) page = 0 # Pages start at 0 added = False timerEnd = tools.Settings.getInteger( 'scraping.providers.timeout') - 8 timer = tools.Time(start=True) while True: # Stop searching 8 seconds before the provider timeout, otherwise might continue searching, not complete in time, and therefore not returning any links. if timer.elapsed() > timerEnd: break urlNew = url % (urllib.quote_plus(query), page) html = BeautifulSoup(client.request(urlNew)) page += 1 added = False htmlTable = html.find_all('table', id='searchResult')[0] htmlRows = htmlTable.find_all( 'tr', recursive=False ) # Do not search further down the tree (just the direct children), because that will also retrieve the header row. for i in range(len(htmlRows)): htmlRow = htmlRows[i] htmlColumns = htmlRow.find_all('td') htmlInfo = htmlColumns[1] # Name htmlName = htmlInfo.find_all('div', class_='detName')[0].find_all( 'a')[0].getText().strip() # Size htmlSize = htmlInfo.find_all( 'font', class_='detDesc')[0].getText().replace(' ', ' ') indexStart = htmlSize.find(', Size') indexEnd = htmlSize.find(', ', indexStart + 1) htmlSize = htmlSize[indexStart + 7:indexEnd] # Link htmlLink = '' htmlLinks = htmlInfo.find_all('a') for j in range(len(htmlLinks)): link = htmlLinks[j]['href'] if link.startswith('magnet:'): htmlLink = link break # Seeds htmlSeeds = int(htmlColumns[2].getText()) # Metadata meta = metadata.Metadata(name=htmlName, title=title, year=year, season=season, episode=episode, pack=pack, link=htmlLink, size=htmlSize, seeds=htmlSeeds) # Ignore if meta.ignore(True): continue # Add sources.append({ 'url': htmlLink, 'debridonly': False, 'direct': False, 'source': 'torrent', 'language': self.language[0], 'quality': meta.videoQuality(), 'info': meta.information(), 'file': htmlName }) added = True if not added: # Last page reached with a working torrent break return sources except: return sources
def sources(self, url, hostDict, hostprDict): sources = [] try: if url == None: raise Exception() data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) if 'exact' in data and data['exact']: query = title = data['tvshowtitle'] if 'tvshowtitle' in data else data['title'] year = None season = None episode = None pack = False packCount = None else: title = data['tvshowtitle'] if 'tvshowtitle' in data else data['title'] year = int(data['year']) if 'year' in data and not data['year'] == None else None season = int(data['season']) if 'season' in data and not data['season'] == None else None episode = int(data['episode']) if 'episode' in data and not data['episode'] == None else None pack = data['pack'] if 'pack' in data else False packCount = data['packcount'] if 'packcount' in data else None if 'tvshowtitle' in data: if pack: query = '%s %d' % (title, season) else: query = '%s S%02dE%02d' % (title, season, episode) else: query = '%s %d' % (title, year) query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', ' ', query) query = urllib.quote_plus(query) category = self.category_tvshows if ('tvshowtitle' in data and not data['tvshowtitle'] == None and not data['tvshowtitle'] == '') else self.category_movies pageLimit = tools.Settings.getInteger('scraping.providers.pages') pageCounter = 0 page = 1 # Pages start at 1 added = False timerEnd = tools.Settings.getInteger('scraping.providers.timeout') - 8 timer = tools.Time(start = True) while True: # Stop searching 8 seconds before the provider timeout, otherwise might continue searching, not complete in time, and therefore not returning any links. if timer.elapsed() > timerEnd: break pageCounter += 1 if pageLimit > 0 and pageCounter > pageLimit: break urlNew = (self.base_link + self.search_link) % (query, category, page) html = BeautifulSoup(client.request(urlNew)) htmlTable = html.find_all('div', class_ = 'content')[0].find_all('table', class_ = 'table-sm', recursive = False)[1] htmlRows = htmlTable.find_all('tr', recursive = False) page += 1 added = False for i in range(len(htmlRows)): htmlRow = htmlRows[i] htmlColumns = htmlRow.find_all('td', recursive = False) # Name htmlName = htmlColumns[0].getText().strip() # Size htmlSize = htmlColumns[1].getText().strip() # Link htmlLink = htmlRow.find_all('td', recursive = False)[0].find_all('a')[0]['href'].strip() htmlLink = re.search('\/torrent\/(.*)\/', htmlLink, re.IGNORECASE).group(1) htmlLink = (self.base_link + self.torrent_link) % htmlLink # Seeds htmlSeeds = int(htmlColumns[3].getText().strip()) # Metadata meta = metadata.Metadata(name = htmlName, title = title, year = year, season = season, episode = episode, pack = pack, packCount = packCount, link = htmlLink, size = htmlSize, seeds = htmlSeeds) # Ignore if meta.ignore(True): continue # Add sources.append({'url' : htmlLink, 'debridonly' : False, 'direct' : False, 'source' : 'torrent', 'language' : self.language[0], 'quality': meta.videoQuality(), 'metadata' : meta, 'file' : htmlName}) added = True if not added: # Last page reached with a working torrent break return sources except: return sources
def sources(self, url, hostDict, hostprDict): sources = [] try: if url == None: raise Exception() data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) title = data['tvshowtitle'] if 'tvshowtitle' in data else data[ 'title'] year = int( data['year'] ) if 'year' in data and not data['year'] == None else None season = int( data['season'] ) if 'season' in data and not data['season'] == None else None episode = int( data['episode'] ) if 'episode' in data and not data['episode'] == None else None pack = data['pack'] if 'pack' in data else False packCount = data['packcount'] if 'packcount' in data else None category = self.category_show if 'tvshowtitle' in data else self.category_movie if 'tvshowtitle' in data: if pack: query = '%s %d' % (title, season) else: query = '%s S%02dE%02d' % (title, season, episode) else: query = '%s %d' % (title, year) query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', ' ', query) querySplit = query.split() # Login if self.enabled and self.username and not self.username == '' and self.password and not self.password == '': login = self.base_link + self.login_link post = urllib.urlencode({ 'username': self.username, 'password': self.password, 'submit': 'submit' }) cookie = client.request(login, post=post, output='cookie', close=False) response = client.request(login, post=post, cookie=cookie, output='extended') headers = { 'User-Agent': response[3]['User-Agent'], 'Cookie': response[3]['Cookie'] } else: cookie = None headers = None url = urlparse.urljoin(self.base_link, self.search_link) pageLimit = tools.Settings.getInteger('scraping.providers.pages') pageCounter = 0 page = 1 added = False firstLink = None timerEnd = tools.Settings.getInteger( 'scraping.providers.timeout') - 8 timer = tools.Time(start=True) while True: # Stop searching 8 seconds before the provider timeout, otherwise might continue searching, not complete in time, and therefore not returning any links. if timer.elapsed() > timerEnd: break pageCounter += 1 if pageLimit > 0 and pageCounter > pageLimit: break urlNew = url % (urllib.quote_plus(query), urllib.quote_plus(category), page) data = client.request(urlNew, cookie=cookie) data = tools.Converter.jsonFrom(data)['torrentList'] page += 1 added = False for i in data: try: # File jsonId = i['fid'] # File jsonFile = i['filename'] # Name try: jsonName = i['name'] except: jsonName = jsonFile # Link jsonLink = self.base_link + self.download_link jsonLink = jsonLink % (jsonId, jsonFile) if not headers == None: jsonLink += '|' + urllib.urlencode(headers) # Size try: jsonSize = i['size'] except: jsonSize = None # Seeds try: jsonSeeds = i['seeders'] except: jsonSeeds = None # Metadata meta = metadata.Metadata(name=jsonName, title=title, year=year, season=season, episode=episode, pack=pack, packCount=packCount, link=jsonLink, size=jsonSize, seeds=jsonSeeds) # Ignore if meta.ignore(True): continue # Add sources.append({ 'url': jsonLink, 'debridonly': False, 'memberonly': True, 'direct': False, 'source': 'torrent', 'language': self.language[0], 'quality': meta.videoQuality(), 'metadata': meta, 'file': jsonName }) added = True except: tools.Logger.error() if not added: # Last page reached with a working torrent break return sources except: tools.Logger.error() return sources
def sources(self, url, hostDict, hostprDict): sources = [] try: if url == None: raise Exception() ignoreContains = None data = self._decode(url) if 'exact' in data and data['exact']: query = title = data[ 'tvshowtitle'] if 'tvshowtitle' in data else data['title'] titles = None year = None season = None episode = None pack = False packCount = None else: title = data['tvshowtitle'] if 'tvshowtitle' in data else data[ 'title'] titles = data[ 'alternatives'] if 'alternatives' in data else None year = int( data['year'] ) if 'year' in data and not data['year'] == None else None season = int( data['season'] ) if 'season' in data and not data['season'] == None else None episode = int( data['episode']) if 'episode' in data and not data[ 'episode'] == None else None pack = data['pack'] if 'pack' in data else False packCount = data['packcount'] if 'packcount' in data else None if 'tvshowtitle' in data: # Search special episodes by name. All special episodes are added to season 0 by Trakt and TVDb. Hence, do not search by filename (eg: S02E00), since the season is not known. if (season == 0 or episode == 0) and ('title' in data and not data['title'] == None and not data['title'] == ''): title = '%s %s' % ( data['tvshowtitle'], data['title'] ) # Change the title for metadata filtering. query = title ignoreContains = len(data['title']) / float( len(title) ) # Increase the required ignore ration, since otherwise individual episodes and season packs are found as well. else: # Only this format works for season packs. # Does not support individual episodes. if pack: query = '%s S%02d' % (title, season) else: pack = True query = '%s сезон %d' % (title, season) else: query = '%s %d' % (title, year) query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', ' ', query) if not self._query(query): return sources url = urlparse.urljoin(self.base_link, self.search_link) pageLimit = tools.Settings.getInteger('scraping.providers.pages') pageCounter = 0 page = 0 # Pages start at 0 added = False timerEnd = tools.Settings.getInteger( 'scraping.providers.timeout') - 8 timer = tools.Time(start=True) while True: # Stop searching 8 seconds before the provider timeout, otherwise might continue searching, not complete in time, and therefore not returning any links. if timer.elapsed() > timerEnd: break pageCounter += 1 if pageLimit > 0 and pageCounter > pageLimit: break urlNew = url % (urllib.quote_plus(query), page) html = client.request(urlNew) # There is a quote missing. # Replace and add custom class for easy identification. html = html.replace( 'style="width:1095px; class=" lista">', 'style="width:1095px;" class="gaia lista">') htmlLower = html.lower() start = htmlLower.index('class="gaia') start = htmlLower.index('</tr>', start) + 5 end = htmlLower.index('</table>', start) + 8 html = html[start:end] html = html.replace('\n', '').replace('\r', '') html = html.replace('</TR>', '</tr>') htmlRows = html.split('</tr>') page += 1 added = False for htmlRow in htmlRows: # Link try: htmlLink = re.search('(magnet:.*?)>', htmlRow, re.IGNORECASE).group(1) except: continue # Name try: htmlName = ' ' + re.search( 'details\.php.*?>(.*?)<', htmlRow, re.IGNORECASE).group(1).strip() except: htmlName = '' # Category try: htmlName += ' ' + re.search( 'border=0\s+alt="(.*?)"', htmlRow, re.IGNORECASE).group(1).strip() except: pass # Size try: htmlSize = re.search('>(\d+\.+\d+ [g|m]b)<', htmlRow, re.IGNORECASE).group(1).strip() except: htmlSize = None # Seeds try: htmlSeeds = int( re.search('>(\d+)<', htmlRow, re.IGNORECASE).group(1).strip()) except: htmlSeeds = None htmlName = re.sub('[^A-Za-z0-9\s]', ' ', htmlName) htmlName = re.sub('\s\s+', ' ', htmlName).strip() # Otherwise if 3D appears multiple time in name, it will be ignored # Eg: 3D Avatar 3D 2009 1080p BluR 3D try: htmlIndex = htmlName.lower().index('3d') htmlName = htmlName.replace('3D', '').replace('3D', '') if htmlIndex >= 0: htmlName += '3D' except: pass # Metadata meta = metadata.Metadata(name=htmlName, title=title, titles=titles, year=year, season=season, episode=episode, pack=pack, packCount=packCount, link=htmlLink, size=htmlSize, seeds=htmlSeeds) # Ignore meta.ignoreAdjust(contains=ignoreContains) if meta.ignore(True): continue # Add sources.append({ 'url': htmlLink, 'debridonly': False, 'direct': False, 'source': 'torrent', 'language': self.language[0], 'quality': meta.videoQuality(), 'metadata': meta, 'file': htmlName, 'pack': pack }) added = True if not added: # Last page reached with a working torrent break return sources except: return sources
def sources(self, url, hostDict, hostprDict): sources = [] try: if url == None: raise Exception() data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) if 'exact' in data and data['exact']: query = title = data[ 'tvshowtitle'] if 'tvshowtitle' in data else data['title'] year = None season = None episode = None pack = False packCount = None else: title = data['tvshowtitle'] if 'tvshowtitle' in data else data[ 'title'] year = int( data['year'] ) if 'year' in data and not data['year'] == None else None season = int( data['season'] ) if 'season' in data and not data['season'] == None else None episode = int( data['episode']) if 'episode' in data and not data[ 'episode'] == None else None pack = data['pack'] if 'pack' in data else False packCount = data['packcount'] if 'packcount' in data else None if 'tvshowtitle' in data: if pack: query = '%s %d' % (title, season) else: query = '%s S%02dE%02d' % (title, season, episode) else: query = '%s %d' % (title, year) query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', ' ', query) url = urlparse.urljoin(self.base_link, self.search_link) pageLimit = tools.Settings.getInteger('scraping.providers.pages') pageCounter = 0 page = 0 # Pages start at 0 added = False timerEnd = tools.Settings.getInteger( 'scraping.providers.timeout') - 8 timer = tools.Time(start=True) #while True: while page == 0: # KickassTorrents currently has a problem to view any other page than page 1 while sorted by seeders. Only view first page. # Stop searching 8 seconds before the provider timeout, otherwise might continue searching, not complete in time, and therefore not returning any links. if timer.elapsed() > timerEnd: break pageCounter += 1 if pageLimit > 0 and pageCounter > pageLimit: break urlNew = url % (urllib.quote_plus(query)) html = client.request(urlNew) # KickassTorrents has major mistakes in their HTML. manually remove parts to create new HTML. indexStart = html.find('<', html.find('<!-- Start of Loop -->') + 1) indexEnd = html.rfind('<!-- End of Loop -->') html = html[indexStart:indexEnd] html = html.replace('<div class="markeredBlock', '</div><div class="markeredBlock' ) # torrentname div tag not closed. html = html.replace('</span></td>', '</td>') # Dangling </span> closing tag. html = BeautifulSoup(html) page += 1 added = False htmlRows = html.find_all( 'tr', recursive=False ) # Do not search further down the tree (just the direct children). for i in range(len(htmlRows)): htmlRow = htmlRows[i] if 'firstr' in htmlRow['class']: # Header. continue htmlColumns = htmlRow.find_all('td') htmlInfo = htmlColumns[0] # Name htmlName = htmlInfo.find_all( 'a', class_='cellMainLink')[0].getText().strip() # Size htmlSize = htmlColumns[1].getText().replace(' ', ' ') # Link htmlLink = '' htmlLinks = htmlInfo.find_all('a', class_='icon16') for j in range(len(htmlLinks)): link = htmlLinks[j] if link.has_attr('href'): link = link['href'] if 'magnet' in link: htmlLink = urllib.unquote( re.findall('(magnet.*)', link)[0] ) # Starts with redirection url, eg: https://mylink.bz/?url=magnet... break # Seeds htmlSeeds = int(htmlColumns[3].getText()) # Metadata meta = metadata.Metadata(name=htmlName, title=title, year=year, season=season, episode=episode, pack=pack, packCount=packCount, link=htmlLink, size=htmlSize, seeds=htmlSeeds) # Ignore if meta.ignore(True): continue # Add sources.append({ 'url': htmlLink, 'debridonly': False, 'direct': False, 'source': 'torrent', 'language': self.language[0], 'quality': meta.videoQuality(), 'metadata': meta, 'file': htmlName }) added = True if not added: # Last page reached with a working torrent break return sources except: return sources
def sources(self, url, hostDict, hostprDict): sources = [] try: if url == None: raise Exception() if not self.enabled: raise Exception() data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) if 'exact' in data and data['exact']: query = title = data['tvshowtitle'] if 'tvshowtitle' in data else data['title'] year = None season = None episode = None else: title = data['tvshowtitle'] if 'tvshowtitle' in data else data['title'] year = int(data['year']) if 'year' in data and not data['year'] == None else None season = int(data['season']) if 'season' in data and not data['season'] == None else None episode = int(data['episode']) if 'episode' in data and not data['episode'] == None else None query = '%s S%02dE%02d' % (title, season, episode) if 'tvshowtitle' in data else '%s %d' % (title, year) query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', ' ', query) if not self.streamQuality == None and not self.streamQuality == '' and not self.streamQuality == 'sd': query += ' %s' % self.streamQuality if not self.streamLanguage == None and not self.streamLanguage == '' and not self.streamLanguage == 'un': query += ' lang:%s' % self.streamLanguage query = urllib.quote_plus(query) hostDict = hostprDict + hostDict iterations = self.streamLimit / float(self.streamIncrease) if iterations < 1: last = self.streamLimit iterations = 1 else: difference = iterations - math.floor(iterations) last = self.streamIncrease if difference == 0 else int(difference * self.streamIncrease) iterations = int(math.ceil(iterations)) timerEnd = tools.Settings.getInteger('scraping.providers.timeout') - 8 timer = tools.Time(start = True) last = settings.Prontv.apiLast() api = settings.Prontv.apiNext() first = last for type in self.types: for offset in range(iterations): # Stop searching 8 seconds before the provider timeout, otherwise might continue searching, not complete in time, and therefore not returning any links. if timer.elapsed() > timerEnd: break if len(sources) >= self.streamLimit: break searchCount = last if offset == iterations - 1 else self.streamIncrease searchFrom = (offset * self.streamIncrease) + 1 results = self.retrieve(type, api, query, searchCount, searchFrom) try: while self.limit(results): last = settings.Prontv.apiLast() if first == last: break api = settings.Prontv.apiNext() results = self.retrieve(type, api, query, searchCount, searchFrom) if self.limit(results): interface.Dialog.notification(title = 35261, message = interface.Translation.string(33952) + ' (' + str(results['fetchedtoday']) + ' ' + interface.Translation.string(35222) + ')', icon = interface.Dialog.IconWarning) tools.Time.sleep(2) return sources except: pass results = results['result'] added = False for result in results: # Information jsonName = result['title'] jsonSize = result['sizeinternal'] jsonExtension = result['extension'] jsonLanguage = result['lang'] jsonHoster = result['hostername'].lower() jsonLink = result['hosterurls'][0]['url'] # Ignore Hosters if not jsonHoster in hostDict: continue # Ignore Non-Videos # Alluc often has other files, such as SRT, also listed as streams. if not jsonExtension == None and not jsonExtension == '' and not tools.Video.extensionValid(jsonExtension): continue # Metadata meta = metadata.Metadata(name = jsonName, title = title, year = year, season = season, episode = episode, link = jsonLink, size = jsonSize) # Ignore if meta.ignore(False): continue # Add sources.append({'url' : jsonLink, 'debridonly' : False, 'direct' : False, 'memberonly' : True, 'source' : jsonHoster, 'language' : jsonLanguage, 'quality': meta.videoQuality(), 'metadata' : meta, 'file' : jsonName}) added = True if not added: break return sources except: return sources
def sources(self, url, hostDict, hostprDict): sources = [] try: if url == None: raise Exception() premiumize = debrid.Premiumize() if not premiumize.accountValid(): raise Exception() data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) title = data['tvshowtitle'] if 'tvshowtitle' in data else data[ 'title'] year = int( data['year'] ) if 'year' in data and not data['year'] == None else None season = int( data['season'] ) if 'season' in data and not data['season'] == None else None episode = int( data['episode'] ) if 'episode' in data and not data['episode'] == None else None pack = data['pack'] if 'pack' in data else False timerEnd = tools.Settings.getInteger( 'scraping.providers.timeout') - 3 timer = tools.Time(start=True) threads = [] ids = [] items = premiumize._itemsTransfer() for item in items: if item['transfer']['progress']['completed'][ 'value'] == 1: # Only finished downloads. id = item['id'] if not id in ids: meta = metadata.Metadata(name=item['name'], title=title, year=year, season=season, episode=episode, pack=pack) if not meta.ignore(size=False): ids.append(id) threads.append( threading.Thread(target=self._item, args=(id, season, episode))) [thread.start() for thread in threads] while True: if timer.elapsed() > timerEnd: break if all([not thread.is_alive() for thread in threads]): break time.sleep(0.5) try: self.mutex.acquire() except: pass items = self.items try: self.mutex.release() except: pass for item in items: jsonName = item['video']['name'] jsonLink = item['video']['link'] jsonSize = item['video']['size']['bytes'] # RAR Files if jsonLink.lower().endswith('.rar'): continue # Metadata meta = metadata.Metadata(name=jsonName, title=title, year=year, season=season, episode=episode, size=jsonSize) # Add sources.append({ 'url': jsonLink, 'premium': True, 'debridonly': True, 'direct': True, 'memberonly': True, 'source': 'Premiumize', 'language': self.language[0], 'quality': meta.videoQuality(), 'info': meta.information(), 'file': jsonName }) return sources except: return sources
def sources(self, url, hostDict, hostprDict): sources = [] try: if url == None: raise Exception() data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) if 'exact' in data and data['exact']: query = title = data[ 'tvshowtitle'] if 'tvshowtitle' in data else data['title'] year = None season = None episode = None pack = False packCount = None else: title = data['tvshowtitle'] if 'tvshowtitle' in data else data[ 'title'] year = int( data['year'] ) if 'year' in data and not data['year'] == None else None season = int( data['season'] ) if 'season' in data and not data['season'] == None else None episode = int( data['episode']) if 'episode' in data and not data[ 'episode'] == None else None pack = data['pack'] if 'pack' in data else False packCount = data['packcount'] if 'packcount' in data else None if 'tvshowtitle' in data: if pack: query = '%s %d' % (title, season) else: query = '%s S%02dE%02d' % (title, season, episode) else: query = '%s %d' % (title, year) query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', ' ', query) querySplit = query.split() url = urlparse.urljoin(self.base_link, self.search_link) pageLimit = tools.Settings.getInteger('scraping.providers.pages') pageCounter = 0 page = 0 # Pages start at 0 added = False timerEnd = tools.Settings.getInteger( 'scraping.providers.timeout') - 8 timer = tools.Time(start=True) while True: # Stop searching 8 seconds before the provider timeout, otherwise might continue searching, not complete in time, and therefore not returning any links. if timer.elapsed() > timerEnd: break pageCounter += 1 if pageLimit > 0 and pageCounter > pageLimit: break urlNew = url % (urllib.quote_plus(query), (page * 20)) data = client.request(urlNew) page += 1 added = False result = json.loads(data)['results'] for i in result: jsonName = i['title'] jsonSize = i['size'] jsonLink = i['magnet'] try: jsonSeeds = int(i['swarm']['seeders']) except: jsonSeeds = None # Metadata meta = metadata.Metadata(name=jsonName, title=title, year=year, season=season, episode=episode, pack=pack, packCount=packCount, link=jsonLink, size=jsonSize, seeds=jsonSeeds) # Ignore if meta.ignore(True): continue # Ignore Name # TorrentProject has a lot of season packs, foreign titles, and other torrents that should be excluded. If the name does not contain the exact search string, ignore the result. if not all(q in jsonName for q in querySplit): continue # Add sources.append({ 'url': jsonLink, 'debridonly': False, 'direct': False, 'source': 'torrent', 'language': self.language[0], 'quality': meta.videoQuality(), 'metadata': meta, 'file': jsonName }) added = True if not added: # Last page reached with a working torrent break return sources except: return sources
def sources(self, url, hostDict, hostprDict): sources = [] try: if not tools.System.developers(): raise Exception() if url == None: raise Exception() data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) if 'exact' in data and data['exact']: query = title = data[ 'tvshowtitle'] if 'tvshowtitle' in data else data['title'] year = None season = None episode = None pack = False packCount = None else: title = data['tvshowtitle'] if 'tvshowtitle' in data else data[ 'title'] year = int( data['year'] ) if 'year' in data and not data['year'] == None else None season = int( data['season'] ) if 'season' in data and not data['season'] == None else None episode = int( data['episode']) if 'episode' in data and not data[ 'episode'] == None else None pack = data['pack'] if 'pack' in data else False packCount = data['packcount'] if 'packcount' in data else None if 'tvshowtitle' in data: if pack: query = '%s %d' % (title, season) else: query = '%s S%02dE%02d' % (title, season, episode) else: query = '%s %d' % (title, year) query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', ' ', query) category = self.category_tvshows if ( 'tvshowtitle' in data and not data['tvshowtitle'] == None and not data['tvshowtitle'] == '') else self.category_movies url = urlparse.urljoin(self.base_link, self.search_link) pageLimit = tools.Settings.getInteger('scraping.providers.pages') pageCounter = 0 page = 1 # Pages start at 1 added = False timerEnd = tools.Settings.getInteger( 'scraping.providers.timeout') - 8 timer = tools.Time(start=True) while True: # Stop searching 8 seconds before the provider timeout, otherwise might continue searching, not complete in time, and therefore not returning any links. if timer.elapsed() > timerEnd: break pageCounter += 1 if pageLimit > 0 and pageCounter > pageLimit: break urlNew = url % (urllib.quote_plus(query), category, page) data = client.request(urlNew) # RarBg's HTML is not valid and a total mess, prababley to make it hard for scrapers. # First try to parse the HTML. If it fails, extract only the table from the markup and construct new HTML. # Sometimes both fail, seems like RarBg randomizes the corruption in its HTML. htmlRows = [] try: html = BeautifulSoup(data) htmlTable = html.find_all('table', class_='lista2t')[0] htmlRows = htmlTable.find_all('tr', class_='lista2', recursive=False) if len(htmlRows) == 0: raise Exception() except: start = data.find('lista2t') if start < 0: raise Exception() start += 7 start = data.find('lista2', start) start = data.find('>', start) + 1 end = data.find('<tr><td align="center" colspan="2">', start) data = '<html><body><table class="lista2t"><tr class="lista2">' + data[ start:end] + '</table></body></html>' html = BeautifulSoup(data) htmlTable = html.find_all('table', class_='lista2t')[0] htmlRows = htmlTable.find_all('tr', class_='lista2', recursive=False) page += 1 added = False for i in range(len(htmlRows)): htmlRow = htmlRows[i] htmlColumns = htmlRow.find_all('td') htmlInfo = htmlColumns[1] # Name htmlName = htmlInfo.find_all('a')[0].getText().strip() # 3D htmlImages = htmlInfo.find_all('img') for j in range(len(htmlImages)): try: if htmlImages[j]['src'].endswith('3d.png'): htmlName += ' 3D' break except: pass # Size htmlSize = htmlColumns[3].getText().strip() # Link # TODO: If the hash cannot be retrieved from the mouse-over image, fallback to the .torrent file. try: htmlLink = htmlInfo.find_all('a')[0]['onmouseover'] start = htmlLink.find('/over/') if start < 0: raise Exception() start += 6 end = htmlLink.find('.', start) htmlLink = htmlLink[start:end] if not len(htmlLink) == 40: raise Exception() htmlLink = self.magnet_link % ( htmlLink, htmlName.replace(' ', '')) except: try: htmlLink = htmlInfo.find_all('a')[0]['href'] start = htmlLink.find('torrent/') if start < 0: raise Exception() start += 8 htmlLink = htmlLink[start:] if len(htmlLink) == 0: raise Exception() htmlLink = self.torrent_link % ( htmlLink, htmlName.replace(' ', '')) except: continue # Seeds htmlSeeds = int(htmlColumns[4].getText().strip()) # Metadata meta = metadata.Metadata(name=htmlName, title=title, year=year, season=season, episode=episode, pack=pack, packCount=packCount, link=htmlLink, size=htmlSize, seeds=htmlSeeds) # Ignore if meta.ignore(True): continue # Add sources.append({ 'url': htmlLink, 'debridonly': False, 'direct': False, 'source': 'torrent', 'language': self.language[0], 'quality': meta.videoQuality(), 'metadata': meta, 'file': htmlName }) added = True if not added: # Last page reached with a working torrent break return sources except: return sources
def sources(self, url, hostDict, hostprDict): sources = [] try: if url == None: raise Exception() data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) if 'exact' in data and data['exact']: query = title = data[ 'tvshowtitle'] if 'tvshowtitle' in data else data['title'] type = None year = None season = None episode = None pack = False packCount = None else: type = 'tv' if 'tvshowtitle' in data else 'movie' title = data['tvshowtitle'] if 'tvshowtitle' in data else data[ 'title'] year = int( data['year'] ) if 'year' in data and not data['year'] == None else None season = int( data['season'] ) if 'season' in data and not data['season'] == None else None episode = int( data['episode']) if 'episode' in data and not data[ 'episode'] == None else None pack = data['pack'] if 'pack' in data else False packCount = data['packcount'] if 'packcount' in data else None if 'tvshowtitle' in data: if pack: query = '%s %d' % (title, season) else: query = '%s S%02dE%02d' % (title, season, episode) else: query = '%s %d' % (title, year) query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', ' ', query) url = urlparse.urljoin(self.base_link, self.search_link) pageLimit = tools.Settings.getInteger('scraping.providers.pages') pageCounter = 0 page = 1 added = False timerEnd = tools.Settings.getInteger( 'scraping.providers.timeout') - 8 timer = tools.Time(start=True) while True: # Stop searching 8 seconds before the provider timeout, otherwise might continue searching, not complete in time, and therefore not returning any links. if timer.elapsed() > timerEnd: break pageCounter += 1 if pageLimit > 0 and pageCounter > pageLimit: break urlNew = url % (urllib.quote_plus(query), page) html = BeautifulSoup(client.request(urlNew)) page += 1 added = False htmlTable = html.find_all('table', class_='table')[0] htmlRows = htmlTable.find_all('td', class_='x-item') for i in range(0, len(htmlRows)): try: htmlRow = htmlRows[i] # Name htmlName = htmlRow.find_all( 'a', class_='title')[0]['title'].strip() # Size htmlSize = htmlRow.find_all( 'div', class_='tail')[0].getText().replace( '\n', '').replace('\r', '').replace(' ', ' ').strip() htmlSize = re.search('.*[sS]ize:(.*)[dD]ownloads.*', htmlSize, re.IGNORECASE) if htmlSize: htmlSize = htmlSize.group(1).strip() else: htmlSize = None # Link htmlLink = htmlRow.find_all( 'div', class_='tail')[0].find_all( 'a', class_='title')[0]['href'].strip() # Metadata meta = metadata.Metadata(name=htmlName, title=title, year=year, season=season, episode=episode, pack=pack, packCount=packCount, link=htmlLink, size=htmlSize, seeds=1) # Ignore if meta.ignore(True): continue # Add sources.append({ 'url': htmlLink, 'debridonly': False, 'direct': False, 'source': 'torrent', 'language': self.language[0], 'quality': meta.videoQuality(), 'metadata': meta, 'file': htmlName }) added = True except: pass if not added: # Last page reached with a working torrent break return sources except: return sources
def sources(self, url, hostDict, hostprDict): sources = [] found = [] try: if url == None: raise Exception() data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) if 'exact' in data and data['exact']: query = title = data[ 'tvshowtitle'] if 'tvshowtitle' in data else data['title'] year = None season = None episode = None pack = False packCount = None else: title = data['tvshowtitle'] if 'tvshowtitle' in data else data[ 'title'] year = int( data['year'] ) if 'year' in data and not data['year'] == None else None season = int( data['season'] ) if 'season' in data and not data['season'] == None else None episode = int( data['episode']) if 'episode' in data and not data[ 'episode'] == None else None pack = data['pack'] if 'pack' in data else False packCount = data['packcount'] if 'packcount' in data else None if 'tvshowtitle' in data: if pack: query = '%s %d' % (title, season) else: query = '%s S%02dE%02d' % (title, season, episode) else: query = '%s %d' % (title, year) query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', ' ', query) url = urlparse.urljoin(self.base_link, self.search_link) pageLimit = tools.Settings.getInteger('scraping.providers.pages') pageCounter = 0 page = 1 added = False timerEnd = tools.Settings.getInteger( 'scraping.providers.timeout') - 8 timer = tools.Time(start=True) while True: # Stop searching 8 seconds before the provider timeout, otherwise might continue searching, not complete in time, and therefore not returning any links. if timer.elapsed() > timerEnd: break pageCounter += 1 if pageLimit > 0 and pageCounter > pageLimit: break urlNew = url % (urllib.quote_plus(query), page) html = BeautifulSoup(client.request(urlNew)) page += 1 added = False htmlTable = html.find_all('table', class_='list')[0] htmlRows = htmlTable.find_all('tr', recursive=False) for i in range(1, len(htmlRows)): htmlRow = htmlRows[i] htmlColumns = htmlRow.find_all( 'td', recursive=False) # Use children and no further. # Name htmlName = htmlColumns[0].find_all('a')[0].getText() # Size htmlSize = htmlColumns[3].getText() # Link htmlLink = htmlColumns[0].find_all('a')[0]['href'] # Age htmlAge = htmlColumns[1].getText().lower() if 'day' in htmlAge: htmlAge = int( htmlAge.replace('days', '').replace('day', '').strip()) elif 'week' in htmlAge: htmlAge = int( htmlAge.replace('weeks', '').replace( 'week', '').strip()) * 7 elif 'month' in htmlAge: htmlAge = int( htmlAge.replace('months', '').replace( 'month', '').strip()) * 30 elif 'year' in htmlAge: htmlAge = int( htmlAge.replace('years', '').replace( 'year', '').strip()) * 365 else: htmlAge = 0 # Metadata meta = metadata.Metadata(name=htmlName, title=title, year=year, season=season, episode=episode, pack=pack, packCount=packCount, link=htmlLink, size=htmlSize, age=htmlAge) meta.mIgnoreLength *= 10 # Otherwise too restrictive for very long usenet titles. # Ignore if meta.ignore(False): continue # Add sources.append({ 'url': htmlLink, 'debridonly': False, 'direct': False, 'source': 'usenet', 'language': self.language[0], 'quality': meta.videoQuality(), 'metadata': meta, 'file': htmlName }) added = True if not added: # Last page reached with a working torrent break return sources except: return sources
def sources(self, url, hostDict, hostprDict): sources = [] try: if url == None: raise Exception() ignoreContains = None data = self._decode(url) if 'exact' in data and data['exact']: query = title = data[ 'tvshowtitle'] if 'tvshowtitle' in data else data['title'] titles = None year = None season = None episode = None pack = False packCount = None else: title = data['tvshowtitle'] if 'tvshowtitle' in data else data[ 'title'] titles = data[ 'alternatives'] if 'alternatives' in data else None year = int( data['year'] ) if 'year' in data and not data['year'] == None else None season = int( data['season'] ) if 'season' in data and not data['season'] == None else None episode = int( data['episode']) if 'episode' in data and not data[ 'episode'] == None else None pack = data['pack'] if 'pack' in data else False packCount = data['packcount'] if 'packcount' in data else None if 'tvshowtitle' in data: # Search special episodes by name. All special episodes are added to season 0 by Trakt and TVDb. Hence, do not search by filename (eg: S02E00), since the season is not known. if (season == 0 or episode == 0) and ('title' in data and not data['title'] == None and not data['title'] == ''): title = '%s %s' % ( data['tvshowtitle'], data['title'] ) # Change the title for metadata filtering. query = title ignoreContains = len(data['title']) / float( len(title) ) # Increase the required ignore ration, since otherwise individual episodes and season packs are found as well. else: if pack: query = '%s %d' % (title, season) else: query = '%s S%02dE%02d' % (title, season, episode) else: query = '%s %d' % (title, year) query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', ' ', query) if not self._query(query): return sources category = self.category_shows if 'tvshowtitle' in data else self.category_movies url = urlparse.urljoin(self.base_link, self.search_link) pageLimit = tools.Settings.getInteger('scraping.providers.pages') pageCounter = 0 page = 1 added = False timerEnd = tools.Settings.getInteger( 'scraping.providers.timeout') - 8 timer = tools.Time(start=True) while True: # Stop searching 8 seconds before the provider timeout, otherwise might continue searching, not complete in time, and therefore not returning any links. if timer.elapsed() > timerEnd: break pageCounter += 1 if pageLimit > 0 and pageCounter > pageLimit: break urlNew = url % (page, urllib.quote_plus(query), category) # For some reason Zooqle returns 404 even though the response has a body. # This is probably a bug on Zooqle's server and the error should just be ignored. html = BeautifulSoup(client.request(urlNew, ignoreErrors=404)) page += 1 added = False htmlTable = html.find_all('table', class_='table-torrents')[0] htmlRows = htmlTable.find_all('tr', recursive=False) for i in range(1, len(htmlRows)): # First row is header. htmlRow = htmlRows[i] htmlColumns = htmlRow.find_all('td') htmlInfo = htmlColumns[1] htmlMeta = htmlInfo.find_all('div', recursive=False)[0] # Name htmlName = htmlInfo.find_all( 'a', recursive=False)[0].getText().strip() # Size htmlSize = htmlColumns[3].getText() # Link htmlLink = '' htmlLinks = htmlColumns[2].find_all('a') for j in range(len(htmlLinks)): link = htmlLinks[j]['href'] if link.startswith('magnet:'): htmlLink = link break # Seeds htmlSeeds = htmlColumns[5].find_all( 'div', recursive=False)[0]['title'] indexStart = htmlSeeds.find(':') if indexStart > 0: indexStart += 1 indexEnd = htmlSeeds.find('|', indexStart) if indexEnd > 0: htmlSeeds = htmlSeeds[indexStart:indexEnd] else: htmlSeeds = htmlSeeds[indexStart:] htmlSeeds = int( htmlSeeds.replace(',', '').replace('.', '').strip()) else: htmlSeeds = None # Quality & 3D try: htmlQuality = htmlMeta.find_all( 'span', class_='hidden-xs')[0].getText().lower().strip() if 'ultra' in htmlQuality: htmlQuality = '4K' elif 'std' in htmlQuality: htmlQuality = 'SD' elif 'med' in htmlQuality or 'low' in htmlQuality: htmlQuality = 'CAM' htmlName += ' ' + htmlQuality except: pass # Audio try: htmlName += ' ' + htmlMeta.find_all( 'span', {'title': 'Audio format'})[0].getText() except: pass # Languages try: htmlLanguages = htmlMeta.find_all( 'span', {'title': 'Detected languages' })[0].getText().split(',') except: htmlLanguages = None # Metadata meta = metadata.Metadata(name=htmlName, title=title, titles=titles, year=year, season=season, episode=episode, pack=pack, packCount=packCount, link=htmlLink, size=htmlSize, seeds=htmlSeeds, languageAudio=htmlLanguages) # Ignore meta.ignoreAdjust(contains=ignoreContains) if meta.ignore(True): continue # Add sources.append({ 'url': htmlLink, 'debridonly': False, 'direct': False, 'source': 'torrent', 'language': self.language[0], 'quality': meta.videoQuality(), 'metadata': meta, 'file': htmlName }) added = True if not added: # Last page reached with a working torrent break return sources except: return sources
def sources(self, url, hostDict, hostprDict): self.items = [ ] # NB: The same object of the provider is used for both normal episodes and season packs. Make sure it is cleared from the previous run. sources = [] try: if url == None: raise Exception() if not debrid.OffCloud().accountValid(): raise Exception() data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) if 'exact' in data and data['exact']: title = data['tvshowtitle'] if 'tvshowtitle' in data else data[ 'title'] year = None season = None episode = None pack = False packCount = 0 else: title = data['tvshowtitle'] if 'tvshowtitle' in data else data[ 'title'] year = int( data['year'] ) if 'year' in data and not data['year'] == None else None season = int( data['season'] ) if 'season' in data and not data['season'] == None else None episode = int( data['episode']) if 'episode' in data and not data[ 'episode'] == None else None pack = data['pack'] if 'pack' in data else False packCount = data['packcount'] if 'packcount' in data else False timerEnd = tools.Settings.getInteger( 'scraping.providers.timeout') - 3 timer = tools.Time(start=True) threads = [] self.ids = [] threads.append( threading.Thread(target=self._items, args=(debrid.OffCloud.CategoryCloud, title, year, season, episode, pack))) threads.append( threading.Thread(target=self._items, args=(debrid.OffCloud.CategoryInstant, title, year, season, episode, pack))) [thread.start() for thread in threads] while True: if timer.elapsed() > timerEnd: break if all([not thread.is_alive() for thread in threads]): break time.sleep(0.5) try: self.mutex.acquire() except: pass items = self.items try: self.mutex.release() except: pass for item in items: try: jsonName = item['video']['name'] try: if not item['name'] == jsonName: jsonName = item[ 'name'] + ' - ' + jsonName # Sometimes metadata, like quality, is only in the folder name, not the file name. except: pass jsonLink = item['video']['link'] try: jsonSize = item['size']['bytes'] except: jsonSize = None # Metadata meta = metadata.Metadata(name=jsonName, title=title, year=year, season=season, episode=episode, size=jsonSize, pack=pack, packCount=packCount) # Add sources.append({ 'url': jsonLink, 'premium': True, 'debridonly': True, 'direct': True, 'memberonly': True, 'source': 'OffCloud', 'language': self.language[0], 'quality': meta.videoQuality(), 'metadata': meta, 'file': jsonName }) except: pass return sources except: return sources