def sources(self, url, hostDict, hostprDict): sources = [] try: if url == None: return sources if not tools.Settings.getBoolean(self.prefix + 'enabled'): return sources data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) type = 'episode' if 'tvshowtitle' in data else 'movie' if type == 'movie': path = self._locationMovies() elif type == 'episode': path = self._locationTvshows() if not path.endswith('\\') and not path.endswith( '/'): # Must end with a slash for tools.File.exists. path += '/' if not tools.File.exists(path): return sources if 'exact' in data and data['exact']: title = data['tvshowtitle'] if 'tvshowtitle' in data else data[ 'title'] else: title = '%s S%02dE%02d' % ( data['tvshowtitle'], int(data['season']), int(data['episode'])) if type == 'episode' else '%s %s' % ( data['title'], data['year']) title = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', ' ', title) files = self._find(path, title) for file in files: file = file.replace('\\\\', '/').replace( '\\', '/' ) # For some reason Python sometimes adds backslash instead of forward slash with os.path. This causes duplicate files not to be filtered out due to a "different" path. meta = metadata.Metadata() meta.loadHeadersFile(file, timeout=30) sources.append({ 'source': '0', 'quality': meta.videoQuality(), 'language': self.language[0], 'url': file, 'file': os.path.basename(file), 'local': True, 'direct': True, 'debridonly': False, 'metadata': meta }) except: pass return sources
def sources(self, url, hostDict, hostprDict): sources = [] try: if url == None: raise Exception() data = self._decode(url) if 'exact' in data and data['exact']: query = title = data['title'] titles = None year = None else: title = data['title'] titles = data['alternatives'] if 'alternatives' in data else None year = int(data['year']) if 'year' in data and not data['year'] == None else None query = '%s %d' % (title, year) query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', ' ', query) if not self._query(query): return sources url = urlparse.urljoin(self.base_link, self.search_link) % urllib.quote_plus(query) html = BeautifulSoup(client.request(url)) htmlTable = html.find_all('div', id = 'Torrents')[0].find_all('div', class_ = 'DownloadFlags')[0] htmlRows = htmlTable.find_all('a', recursive = False) # Do not search further down the tree (just the direct children), because that will also retrieve the header row. for i in range(1, len(htmlRows)): # Skip first entry try: htmlRow = htmlRows[i] htmlData = htmlRow['onmouseover'].split(',') if not len(htmlData) == 11: continue # Name htmlName = htmlData[5].strip().strip("'") # Link htmlLink = htmlRow['href'].strip() htmlLink = re.search('\/.*\/(.*)\.aspx', htmlLink).group(1).replace('-', '.') htmlLink = urlparse.urljoin(self.base_link, self.download_link) % urllib.quote_plus(htmlLink) # Size htmlSize = htmlData[7].strip().strip("'") # Metadata meta = metadata.Metadata(name = htmlName, title = title, titles = titles, year = year, link = htmlLink, size = htmlSize, seeds = 1) # Ignore meta.mIgnoreLength = 10 if meta.ignore(True): continue # Add sources.append({'url' : htmlLink, 'debridonly' : False, 'direct' : False, 'source' : 'torrent', 'language' : self.language[0], 'quality': meta.videoQuality(), 'metadata' : meta, 'file' : htmlName}) except: pass return sources except: return sources
def sources(self, url, hostDict, hostprDict): sources = [] try: if url == None: raise Exception() data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) if 'exact' in data and data['exact']: titleYear = title = data['tvshowtitle'] if 'tvshowtitle' in data else data['title'] year = None season = None episode = None pack = False packCount = None else: title = data['tvshowtitle'] if 'tvshowtitle' in data else data['title'] titleYear = '%s %s' % (title, str(data['year'])) year = int(data['year']) if 'year' in data and not data['year'] == None else None season = int(data['season']) if 'season' in data and not data['season'] == None else None episode = int(data['episode']) if 'episode' in data and not data['episode'] == None else None query = data['imdb'] if 'imdb' in data and not data['imdb'] == None else title url = urlparse.urljoin(self.base_link, self.search_link) % query result = json.loads(client.request(url)) movie = result['data']['movies'][0] name = movie['title_long'] + ' ' torrents = movie['torrents'] for torrent in torrents: quality = torrent['quality'] if quality.lower() == '3d': quality += ' HD1080' jsonName = name + quality jsonSize = torrent['size_bytes'] jsonHash = torrent['hash'] jsonLink = network.Container(jsonHash).torrentMagnet(title = titleYear) try: jsonSeeds = int(torrent['seeds']) except: jsonSeeds = None # Metadata meta = metadata.Metadata(name = jsonName, title = title, year = year, season = season, episode = episode, link = jsonLink, size = jsonSize, seeds = jsonSeeds) jsonLink = network.Container(jsonHash).torrentMagnet(title = meta.title(extended = True)) # Ignore if meta.ignore(False): continue # Add sources.append({'url' : jsonLink, 'debridonly' : False, 'direct' : False, 'source' : 'torrent', 'language' : self.language[0], 'quality': meta.videoQuality(), 'metadata' : meta, 'file' : jsonName}) return sources except: return sources
def sources(self, url, hostDict, hostprDict): sources = [] try: if url == None: raise Exception() data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) # Get a token. Expires every 15 minutes, but just request the token on every search. The old token will be returned if the previous one did not yet expire. url = network.Networker.linkJoin(self.base_link, self.api_link, self.token_link) result = json.loads(client.request(url)) token = result['token'] title = data['tvshowtitle'] if 'tvshowtitle' in data else data['title'] year = int(data['year']) if 'year' in data and not data['year'] == None else None season = int(data['season']) if 'season' in data and not data['season'] == None else None episode = int(data['episode']) if 'episode' in data and not data['episode'] == None else None pack = data['pack'] if 'pack' in data else False category = self.category_shows if 'tvshowtitle' in data else self.category_movies if 'tvshowtitle' in data: if pack: query = '%s %d' % (title, season) else: query = '%s S%02dE%02d' % (title, season, episode) else: query = '%s %d' % (title, year) query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', ' ', query) url = network.Networker.linkJoin(self.base_link, self.api_link, self.search_link) % (token, urllib.quote_plus(query), category) result = json.loads(client.request(url)) torrents = result['torrent_results'] for torrent in torrents: jsonName = torrent['title'] jsonSize = torrent['size'] jsonSeeds = torrent['seeders'] jsonLink = torrent['download'] # Metadata meta = metadata.Metadata(name = jsonName, title = title, year = year, season = season, episode = episode, pack = pack, link = jsonLink, size = jsonSize, seeds = jsonSeeds) # Ignore if meta.ignore(False): continue # Add sources.append({'url' : jsonLink, 'debridonly' : False, 'direct' : False, 'source' : 'torrent', 'language' : self.language[0], 'quality': meta.videoQuality(), 'info' : meta.information(), 'file' : jsonName}) return sources except: return sources
def _items(self, category, title, titles, year, season, episode, pack): try: timerEnd = tools.Settings.getInteger( 'scraping.providers.timeout') - 3 timer = tools.Time(start=True) items = offcloud.Core().items(category=category) try: self.mutex.acquire() except: pass threads = [] for item in items: if item['status'] == offcloud.Core.StatusFinished: # Only finished downloads. id = item['id'] if not id in self.ids: meta = metadata.Metadata(name=item['name'], title=title, titles=titles, year=year, season=season, episode=episode, pack=pack) if not meta.ignore(size=False): self.ids.append(id) if category == offcloud.Core.CategoryInstant: self.items.append(item) else: threads.append( threading.Thread(target=self._item, args=(category, id, season, episode))) try: self.mutex.release() except: pass if len(threads) > 0: [thread.start() for thread in threads] while True: if timer.elapsed() > timerEnd: break if all([not thread.is_alive() for thread in threads]): break time.sleep(0.5) except: tools.Logger.error()
def sources(self, url, hostDict, hostprDict): sources = [] try: if url == None: raise Exception() ignoreContains = None data = self._decode(url) if 'exact' in data and data['exact']: query = title = data['tvshowtitle'] if 'tvshowtitle' in data else data['title'] titles = None year = None season = None episode = None pack = False packCount = None else: title = data['tvshowtitle'] if 'tvshowtitle' in data else data['title'] titles = data['alternatives'] if 'alternatives' in data else None year = int(data['year']) if 'year' in data and not data['year'] == None else None season = int(data['season']) if 'season' in data and not data['season'] == None else None episode = int(data['episode']) if 'episode' in data and not data['episode'] == None else None pack = data['pack'] if 'pack' in data else False packCount = data['packcount'] if 'packcount' in data else None if 'tvshowtitle' in data: # Search special episodes by name. All special episodes are added to season 0 by Trakt and TVDb. Hence, do not search by filename (eg: S02E00), since the season is not known. if (season == 0 or episode == 0) and ('title' in data and not data['title'] == None and not data['title'] == ''): title = '%s %s' % (data['tvshowtitle'], data['title']) # Change the title for metadata filtering. query = title ignoreContains = len(data['title']) / float(len(title)) # Increase the required ignore ration, since otherwise individual episodes and season packs are found as well. else: if pack: query = '%s %d' % (title, season) else: query = '%s S%02dE%02d' % (title, season, episode) else: query = '%s %d' % (title, year) query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', ' ', query) query = urllib.quote_plus(query) if not self._query(query): return sources pageLimit = tools.Settings.getInteger('scraping.providers.pages') pageCounter = 0 page = 1 # Pages start at 1 added = False timerEnd = tools.Settings.getInteger('scraping.providers.timeout') - 8 timer = tools.Time(start = True) while True: # Stop searching 8 seconds before the provider timeout, otherwise might continue searching, not complete in time, and therefore not returning any links. if timer.elapsed() > timerEnd: break pageCounter += 1 if pageLimit > 0 and pageCounter > pageLimit: break urlNew = (self.base_link + self.search_link) % (query, page) html = BeautifulSoup(client.request(urlNew)) htmlTable = html.find_all('table', class_ = 'search-table')[0] htmlRows = htmlTable.find_all('tr', recursive = False) page += 1 added = False for i in range(len(htmlRows)): htmlRow = htmlRows[i] htmlColumns = htmlRow.find_all('td', recursive = False) # Name htmlName = htmlColumns[0].getText().strip() # Size htmlSize = htmlColumns[2].getText().strip() # Link htmlLink = htmlColumns[0].find_all('a')[0]['href'].strip() htmlLink = network.Container(htmlLink).torrentMagnet(title = title) # Seeds htmlSeeds = int(htmlColumns[3].getText().strip()) # Metadata meta = metadata.Metadata(name = htmlName, title = title, titles = titles, year = year, season = season, episode = episode, pack = pack, packCount = packCount, link = htmlLink, size = htmlSize, seeds = htmlSeeds) # Ignore meta.ignoreAdjust(contains = ignoreContains) if meta.ignore(True): continue # Add sources.append({'url' : htmlLink, 'debridonly' : False, 'direct' : False, 'source' : 'torrent', 'language' : self.language[0], 'quality': meta.videoQuality(), 'metadata' : meta, 'file' : htmlName}) added = True if not added: # Last page reached with a working torrent break return sources except: return sources
def add(result, title): link = result['file'].encode('utf-8') name = os.path.basename(link) try: videoQuality = int( result['streamdetails']['video'][0]['width']) except: videoQuality = -1 threshold = 20 # Some videos are a bit smaller. if videoQuality >= 8192 - threshold: videoQuality = 'HD8K' elif videoQuality >= 6144 - threshold: videoQuality = 'HD6K' elif videoQuality >= 3840 - threshold: videoQuality = 'HD4K' elif videoQuality >= 2048 - threshold: videoQuality = 'HD2K' elif videoQuality >= 1920 - threshold: videoQuality = 'HD1080' elif videoQuality >= 1280 - threshold: videoQuality = 'HD720' else: videoQuality = 'SD' try: videoCodec = result['streamdetails']['video'][0]['codec'] except: videoCodec = None try: video3D = len( result['streamdetails']['video'][0]['stereomode']) > 0 except: video3D = None try: audioChannels = result['streamdetails']['audio'][0][ 'channels'] except: audioChannels = None try: audioCodec = result['streamdetails']['audio'][0]['codec'] except: audioCodec = None try: subtitle = len(result['streamdetails']['subtitle']) > 0 except: subtitle = None try: file = control.openFile(link) size = file.size() file.close() except: size = None try: meta = metadata.Metadata(name=name, title=title, link=link, size=size) meta.setVideoQuality(videoQuality) meta.setVideoCodec(videoCodec) meta.setVideo3D(video3D) meta.setAudioChannels(audioChannels) meta.setAudioCodec(audioCodec) meta.setSubtitlesSoft(subtitle) except: pass sources.append({ 'source': '0', 'quality': meta.videoQuality(), 'language': self.language[0], 'url': link, 'file': name, 'local': True, 'direct': True, 'debridonly': False, 'metadata': meta })
def sources(self, url, hostDict, hostprDict): sources = [] try: if url == None: raise Exception() data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) if 'exact' in data and data['exact']: query = title = data[ 'tvshowtitle'] if 'tvshowtitle' in data else data['title'] year = None season = None episode = None pack = False packCount = None else: title = data['tvshowtitle'] if 'tvshowtitle' in data else data[ 'title'] year = int( data['year'] ) if 'year' in data and not data['year'] == None else None season = int( data['season'] ) if 'season' in data and not data['season'] == None else None episode = int( data['episode']) if 'episode' in data and not data[ 'episode'] == None else None pack = data['pack'] if 'pack' in data else False packCount = data['packcount'] if 'packcount' in data else None if 'tvshowtitle' in data: if pack: query = '%s %d' % (title, season) else: query = '%s S%02dE%02d' % (title, season, episode) else: query = '%s %d' % (title, year) query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', ' ', query) querySplit = query.split() url = urlparse.urljoin(self.base_link, self.search_link) pageLimit = tools.Settings.getInteger('scraping.providers.pages') pageCounter = 0 page = 0 # Pages start at 0 added = False timerEnd = tools.Settings.getInteger( 'scraping.providers.timeout') - 8 timer = tools.Time(start=True) while True: # Stop searching 8 seconds before the provider timeout, otherwise might continue searching, not complete in time, and therefore not returning any links. if timer.elapsed() > timerEnd: break pageCounter += 1 if pageLimit > 0 and pageCounter > pageLimit: break urlNew = url % (urllib.quote_plus(query), (page * 20)) data = client.request(urlNew) page += 1 added = False result = json.loads(data)['results'] for i in result: jsonName = i['title'] jsonSize = i['size'] jsonLink = i['magnet'] try: jsonSeeds = int(i['swarm']['seeders']) except: jsonSeeds = None # Metadata meta = metadata.Metadata(name=jsonName, title=title, year=year, season=season, episode=episode, pack=pack, packCount=packCount, link=jsonLink, size=jsonSize, seeds=jsonSeeds) # Ignore if meta.ignore(True): continue # Ignore Name # TorrentProject has a lot of season packs, foreign titles, and other torrents that should be excluded. If the name does not contain the exact search string, ignore the result. if not all(q in jsonName for q in querySplit): continue # Add sources.append({ 'url': jsonLink, 'debridonly': False, 'direct': False, 'source': 'torrent', 'language': self.language[0], 'quality': meta.videoQuality(), 'metadata': meta, 'file': jsonName }) added = True if not added: # Last page reached with a working torrent break return sources except: return sources
def sources(self, url, hostDict, hostprDict): sources = [] try: if url == None: raise Exception() data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) if 'exact' in data and data['exact']: query = title = data[ 'tvshowtitle'] if 'tvshowtitle' in data else data['title'] year = None season = None episode = None pack = False packCount = None else: title = data['tvshowtitle'] if 'tvshowtitle' in data else data[ 'title'] year = int( data['year'] ) if 'year' in data and not data['year'] == None else None season = int( data['season'] ) if 'season' in data and not data['season'] == None else None episode = int( data['episode']) if 'episode' in data and not data[ 'episode'] == None else None pack = data['pack'] if 'pack' in data else False packCount = data['packcount'] if 'packcount' in data else None if 'tvshowtitle' in data: if pack: query = '%s %d' % (title, season) else: query = '%s S%02dE%02d' % (title, season, episode) else: query = '%s %d' % (title, year) query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', ' ', query) category = self.category_shows if 'tvshowtitle' in data else self.category_movies url = urlparse.urljoin(self.base_link, self.search_link) pageLimit = tools.Settings.getInteger('scraping.providers.pages') pageCounter = 0 page = 1 # Pages start at 1 added = False timerEnd = tools.Settings.getInteger( 'scraping.providers.timeout') - 8 timer = tools.Time(start=True) while True: # Stop searching 8 seconds before the provider timeout, otherwise might continue searching, not complete in time, and therefore not returning any links. if timer.elapsed() > timerEnd: break pageCounter += 1 if pageLimit > 0 and pageCounter > pageLimit: break urlNew = url % (category, urllib.quote_plus(query), page) html = client.request(urlNew) # HTML is corrupt. Try to fix it manually. try: indexStart = html.find('class="table2"') indexStart = html.find('<tr bgcolor', indexStart) indexEnd = html.find('search_stat', indexStart) html = html[indexStart:indexEnd] indexEnd = html.rfind('</td>') + 5 html = html[:indexEnd] html = html.replace('</a></td>', '</td>') html = '<table>' + html + '</tr></table>' except: pass html = BeautifulSoup(html) page += 1 added = False htmlRows = html.find_all( 'tr' ) # Do not search further down the tree (just the direct children), because that will also retrieve the header row. for i in range(len(htmlRows)): htmlRow = htmlRows[i] htmlColumns = htmlRow.find_all('td') htmlInfo = htmlColumns[0].find_all('div')[0] # Name htmlName = htmlInfo.find_all( 'a', recursive=False)[1].getText().strip() # Link htmlHash = htmlInfo.find_all('a', recursive=False)[0]['href'] indexStart = htmlHash.find('torrent/') if indexStart < 0: continue indexStart += 8 indexEnd = htmlHash.find('.torrent', indexStart) if indexEnd < 0: continue htmlHash = htmlHash[indexStart:indexEnd] if not tools.Hash.valid(htmlHash): continue htmlLink = network.Container(htmlHash).torrentMagnet( title=query) # Size htmlSize = htmlColumns[2].getText().strip() # Seeds htmlSeeds = int(htmlColumns[3].getText().replace( ',', '').replace(' ', '')) # Metadata meta = metadata.Metadata(name=htmlName, title=title, year=year, season=season, episode=episode, pack=pack, packCount=packCount, link=htmlLink, size=htmlSize, seeds=htmlSeeds) # Ignore if meta.ignore(True): continue # Add sources.append({ 'url': htmlLink, 'debridonly': False, 'direct': False, 'source': 'torrent', 'language': self.language[0], 'quality': meta.videoQuality(), 'metadata': meta, 'file': htmlName }) added = True if not added: # Last page reached with a working torrent break return sources except: return sources
def sources(self, url, hostDict, hostprDict): self.tSources = [] try: if url == None: raise Exception() ignoreContains = None data = self._decode(url) if 'exact' in data and data['exact']: query = title = data['tvshowtitle'] if 'tvshowtitle' in data else data['title'] titles = None year = None season = None episode = None pack = False packCount = None else: title = data['tvshowtitle'] if 'tvshowtitle' in data else data['title'] titles = data['alternatives'] if 'alternatives' in data else None year = int(data['year']) if 'year' in data and not data['year'] == None else None season = int(data['season']) if 'season' in data and not data['season'] == None else None episode = int(data['episode']) if 'episode' in data and not data['episode'] == None else None pack = data['pack'] if 'pack' in data else False packCount = data['packcount'] if 'packcount' in data else None if 'tvshowtitle' in data: # Search special episodes by name. All special episodes are added to season 0 by Trakt and TVDb. Hence, do not search by filename (eg: S02E00), since the season is not known. if (season == 0 or episode == 0) and ('title' in data and not data['title'] == None and not data['title'] == ''): title = '%s %s' % (data['tvshowtitle'], data['title']) # Change the title for metadata filtering. query = title ignoreContains = len(data['title']) / float(len(title)) # Increase the required ignore ration, since otherwise individual episodes and season packs are found as well. else: if pack: query = '%s %d' % (title, season) else: query = '%s S%02dE%02d' % (title, season, episode) else: query = '%s %d' % (title, year) query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', ' ', query) query = urllib.quote_plus(query) if not self._query(query): return sources url = urlparse.urljoin(self.base_link, self.search_link) pageLimit = tools.Settings.getInteger('scraping.providers.pages') pageCounter = 0 # Page starts at 1, but incremented before first request. timerTimeout = tools.Settings.getInteger('scraping.providers.timeout') timerEnd = timerTimeout - 8 timer = tools.Time(start = True) threads = [] self.tLock = threading.Lock() while True: try: # Stop searching 8 seconds before the provider timeout, otherwise might continue searching, not complete in time, and therefore not returning any links. if timer.elapsed() > timerEnd: break added = False pageCounter += 1 if pageLimit > 0 and pageCounter > pageLimit: break html = BeautifulSoup(client.request(url % (query, pageCounter))) htmlTable = html.find_all('table', class_ = 'results') htmlTable = htmlTable[len(htmlTable) - 1] htmlRows = htmlTable.find_all('tr') for i in range(1, len(htmlRows)): try: htmlRow = htmlRows[i] htmlColumns = htmlRow.find_all('td', recursive = False) # Use children and no further. # Name htmlName = htmlColumns[0].find_all('a')[0].getText() # Link htmlLink = urlparse.urljoin(self.base_link, htmlColumns[0].find_all('a')[0]['href']) # Size htmlSize = htmlColumns[1].getText() # Age htmlAge = htmlColumns[3].getText() htmlAge = int(convert.ConverterDuration(htmlAge).value(convert.ConverterDuration.UnitDay)) # Metadata meta = metadata.Metadata(name = htmlName, title = title, titles = titles, year = year, season = season, episode = episode, pack = pack, packCount = packCount, link = htmlLink, size = htmlSize, age = htmlAge) # Ignore meta.ignoreAdjust(contains = ignoreContains, length = 0.3) if meta.ignore(False): continue # Add self.tLock.acquire() self.tSources.append({'url' : None, 'debridonly' : False, 'direct' : False, 'source' : 'usenet', 'language' : self.language[0], 'quality': meta.videoQuality(), 'metadata' : meta, 'file' : htmlName}) self.tLock.release() added = True # Link thread = threading.Thread(target = self._link, args = (htmlLink, len(self.tSources) - 1)) threads.append(thread) thread.start() except: pass if not added: break except: break # First filter out all non-related links before doing the hash lookup. timerTimeout -= 2 while True: if timer.elapsed() > timerTimeout: break if not any([thread.is_alive() for thread in threads]): break tools.Time.sleep(0.5) try: self.tLock.release() except: pass except: try: self.tLock.release() except: pass return [i for i in self.tSources if i['url']]
def sources(self, url, hostDict, hostprDict): sources = [] try: if url == None: raise Exception() if not (self.enabled and self.api and not self.api == ''): raise Exception() data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) title = data['tvshowtitle'] if 'tvshowtitle' in data else data[ 'title'] year = int( data['year'] ) if 'year' in data and not data['year'] == None else None season = int( data['season'] ) if 'season' in data and not data['season'] == None else None episode = int( data['episode'] ) if 'episode' in data and not data['episode'] == None else None pack = data['pack'] if 'pack' in data else False if 'tvshowtitle' in data: if pack: query = '%s %d' % (title, season) else: query = '%s S%02dE%02d' % (title, season, episode) else: query = '%s %d' % (title, year) query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', ' ', query) url = self.search_link % (urllib.quote_plus(query), self.api) url = urlparse.urljoin(self.base_link, url) result = json.loads(client.request(url)) if 'item' in result: for item in result['item']: jsonName = item['title'] jsonLink = item['link'] jsonSize = None jsonPassword = False jsonAge = None if 'newznab:attr' in item: for attribute in item['newznab:attr']: if 'size' in attribute['_name']: jsonSize = int(attribute['_value']) elif 'password' in attribute['_name']: jsonPassword = int(attribute['_value']) elif 'usenetdate' in attribute['_name']: jsonAge = attribute['_value'] jsonAge = jsonAge[jsonAge.find(',') + 2:jsonAge.find(':') - 3] jsonAge = tools.Time.datetime( jsonAge, '%d %b %Y') jsonAge = datetime.datetime.today() - jsonAge jsonAge = jsonAge.days # Metadata meta = metadata.Metadata(name=jsonName, title=title, year=year, season=season, episode=episode, pack=pack, link=jsonLink, size=jsonSize, age=jsonAge) # Ignore if meta.ignore(False) or jsonPassword: continue # Add sources.append({ 'url': jsonLink, 'debridonly': False, 'direct': False, 'memberonly': True, 'source': 'usenet', 'language': self.language[0], 'quality': meta.videoQuality(), 'info': meta.information(), 'file': jsonName }) return sources except: return sources
def _search(self, url, query, show, type, title, titles, year, season, episode, pack, packCount, packException, ignoreContains): pageLimit = tools.Settings.getInteger('scraping.providers.pages') pageCounter = 0 page = 0 added = False try: while True: pageCounter += 1 if pageLimit > 0 and pageCounter > pageLimit: break html = BeautifulSoup(client.request(url % (type, query))) page += 1 added = False htmlTable = html.find_all('table', class_='table-corps') if len(htmlTable) > 0: htmlTable = htmlTable[0] try: htmlTable = htmlTable.find_all('tbody', recursive=False)[0] except: pass htmlRows = htmlTable.find_all('tr', recursive=False) for i in range(len(htmlRows)): htmlRow = htmlRows[i] htmlColumns = htmlRow.find_all('td', recursive=False) # Name htmlName = htmlColumns[0].find_all( 'a')[0].getText().strip() # Link htmlLink = urlparse.urljoin( self.base_link, htmlColumns[0].find_all('a') [0].get('href').encode('utf-8')) # Size htmlSize = re.sub( '([mMkKgGtT]?)[oO]', '\\1b', htmlColumns[0].find_all( 'div', class_='poid')[0].getText()) if not 'b' in htmlSize: htmlSize = htmlSize + ' mb' # Seeds try: htmlSeeds = int(htmlColumns[0].find_all( 'div', class_='up')[0].getText().strip()) except: htmlSeeds = None # Metadata meta = metadata.Metadata(name=htmlName, title=title, titles=titles, year=year, season=season, episode=episode, pack=pack, packCount=packCount, link=htmlLink, size=htmlSize, seeds=htmlSeeds) # Ignore meta.ignoreAdjust(contains=ignoreContains) if meta.ignore(True, season=not packException): continue # Add self.tLock.acquire() self.tSources.append({ 'url': htmlLink, 'debridonly': False, 'direct': False, 'source': 'torrent', 'language': self.language[0], 'quality': meta.videoQuality(), 'metadata': meta, 'file': htmlName }) self.tLock.release() added = True self.tLock.acquire() thread = threading.Thread( target=self._link, args=(htmlLink, len(self.tSources) - 1)) self.tThreadsLinks.append(thread) self.tLock.release() thread.start() # Only shows 1 page. break except: tools.Logger.error() finally: try: self.tLock.release() except: pass
def sources(self, url, hostDict, hostprDict): sources = [] try: if url == None: raise Exception() if not (self.enabled and self.api and not self.api == ''): raise Exception() data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) title = data['tvshowtitle'] if 'tvshowtitle' in data else data[ 'title'] year = int( data['year'] ) if 'year' in data and not data['year'] == None else None imdb = data['imdb'].replace('tt', '') if 'imdb' in data else None tvdb = data['tvdb'] if 'tvdb' in data else None season = int( data['season'] ) if 'season' in data and not data['season'] == None else None episode = int( data['episode'] ) if 'episode' in data and not data['episode'] == None else None pack = data['pack'] if 'pack' in data else False if pack: raise Exception( ) # Currently not supported. Will need a general search. url = self.base_link + (self.search_link % (self.api, metadata.Metadata.IgnoreSize)) if not tvdb == None: url += self.show_link % (tvdb, season, episode) else: url += self.movie_link % (imdb) result = json.loads(client.request(url)) for item in result['channel']['item']: jsonName = item['title'] jsonLink = item['link'] # Contains HTML enteties such as & # Add wrapper to link, otherwise BeautifulSoup gives a lot of warnings. jsonLink = '[BUBBLES]' + jsonLink + '[BUBBLES]' jsonLink = BeautifulSoup(jsonLink).contents jsonLink = jsonLink[0].replace('[BUBBLES]', '') jsonAge = item['pubDate'] jsonAge = jsonAge[jsonAge.find(',') + 2:jsonAge.find(':') - 3] jsonAge = tools.Time.datetime(jsonAge, '%d %b %Y') jsonAge = datetime.datetime.today() - jsonAge jsonAge = jsonAge.days jsonSize = None try: for attribute in item['attr']: attribute = attribute['@attributes'] if attribute['name'] == 'size': jsonSize = int(attribute['value']) break except: pass # Metadata meta = metadata.Metadata(name=jsonName, title=title, year=year, season=season, episode=episode, pack=pack, link=jsonLink, size=jsonSize, age=jsonAge) # Ignore if meta.ignore(True): continue # Add sources.append({ 'url': jsonLink, 'debridonly': False, 'direct': False, 'memberonly': True, 'source': 'usenet', 'language': self.language[0], 'quality': meta.videoQuality(), 'info': meta.information(), 'file': jsonName }) return sources except: return sources
def sources(self, url, hostDict, hostprDict): sources = [] try: if url == None: raise Exception() ignoreContains = None data = self._decode(url) if 'exact' in data and data['exact']: query = title = data[ 'tvshowtitle'] if 'tvshowtitle' in data else data['title'] titles = None year = None season = None episode = None pack = False packCount = None else: title = data['tvshowtitle'] if 'tvshowtitle' in data else data[ 'title'] titles = data[ 'alternatives'] if 'alternatives' in data else None year = int( data['year'] ) if 'year' in data and not data['year'] == None else None season = int( data['season'] ) if 'season' in data and not data['season'] == None else None episode = int( data['episode']) if 'episode' in data and not data[ 'episode'] == None else None pack = data['pack'] if 'pack' in data else False packCount = data['packcount'] if 'packcount' in data else None if 'tvshowtitle' in data: # Search special episodes by name. All special episodes are added to season 0 by Trakt and TVDb. Hence, do not search by filename (eg: S02E00), since the season is not known. if (season == 0 or episode == 0) and ('title' in data and not data['title'] == None and not data['title'] == ''): title = '%s %s' % ( data['tvshowtitle'], data['title'] ) # Change the title for metadata filtering. query = title ignoreContains = len(data['title']) / float( len(title) ) # Increase the required ignore ration, since otherwise individual episodes and season packs are found as well. else: if pack: query = '%s S%02d' % ( title, season ) # Must add S before season, otherwise TorrentAPI throws an error (maybe because the search term is too general). else: query = '%s S%02dE%02d' % (title, season, episode) else: query = '%s %d' % (title, year) query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', ' ', query) if not self._query(query): return sources # Ensure that only a single token is retrieved when searching for alternative titles. # Otherwise a HTTP 429 error is thrown (too many requests). first = False if source.Lock is None: source.Lock = threading.Lock() first = True source.Lock.acquire() if first: # Get a token. Expires every 15 minutes, but just request the token on every search. The old token will be returned if the previous one did not yet expire. url = self.base_link + self.api_link + self.token_link result = json.loads(client.request(url)) source.Token = result['token'] else: tools.Time.sleep(self.rate_limit * 1.1) # There is a 1req/2s limit. category = self.category_shows if 'tvshowtitle' in data else self.category_movies url = (self.base_link + self.api_link + self.search_link) % ( source.Token, urllib.quote_plus(query), category) try: result = json.loads(client.request(url)) torrents = result['torrent_results'] for torrent in torrents: jsonName = torrent['title'] jsonSize = torrent['size'] jsonLink = torrent['download'] try: jsonSeeds = int(torrent['seeders']) except: jsonSeeds = None # Metadata meta = metadata.Metadata(name=jsonName, title=title, titles=titles, year=year, season=season, episode=episode, pack=pack, packCount=packCount, link=jsonLink, size=jsonSize, seeds=jsonSeeds) # Ignore meta.ignoreAdjust(contains=ignoreContains) if meta.ignore(False): continue # Add sources.append({ 'url': jsonLink, 'debridonly': False, 'direct': False, 'source': 'torrent', 'language': self.language[0], 'quality': meta.videoQuality(), 'metadata': meta, 'file': jsonName }) except: pass source.Lock.release() except: tools.Logger.error() return sources
def sources(self, url, hostDict, hostprDict): self.items = [ ] # NB: The same object of the provider is used for both normal episodes and season packs. Make sure it is cleared from the previous run. sources = [] try: if url == None: raise Exception() if not debrid.OffCloud().accountValid(): raise Exception() data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) if 'exact' in data and data['exact']: title = data['tvshowtitle'] if 'tvshowtitle' in data else data[ 'title'] year = None season = None episode = None pack = False packCount = 0 else: title = data['tvshowtitle'] if 'tvshowtitle' in data else data[ 'title'] year = int( data['year'] ) if 'year' in data and not data['year'] == None else None season = int( data['season'] ) if 'season' in data and not data['season'] == None else None episode = int( data['episode']) if 'episode' in data and not data[ 'episode'] == None else None pack = data['pack'] if 'pack' in data else False packCount = data['packcount'] if 'packcount' in data else False timerEnd = tools.Settings.getInteger( 'scraping.providers.timeout') - 3 timer = tools.Time(start=True) threads = [] self.ids = [] threads.append( threading.Thread(target=self._items, args=(debrid.OffCloud.CategoryCloud, title, year, season, episode, pack))) threads.append( threading.Thread(target=self._items, args=(debrid.OffCloud.CategoryInstant, title, year, season, episode, pack))) [thread.start() for thread in threads] while True: if timer.elapsed() > timerEnd: break if all([not thread.is_alive() for thread in threads]): break time.sleep(0.5) try: self.mutex.acquire() except: pass items = self.items try: self.mutex.release() except: pass for item in items: try: jsonName = item['video']['name'] try: if not item['name'] == jsonName: jsonName = item[ 'name'] + ' - ' + jsonName # Sometimes metadata, like quality, is only in the folder name, not the file name. except: pass jsonLink = item['video']['link'] try: jsonSize = item['size']['bytes'] except: jsonSize = None # Metadata meta = metadata.Metadata(name=jsonName, title=title, year=year, season=season, episode=episode, size=jsonSize, pack=pack, packCount=packCount) # Add sources.append({ 'url': jsonLink, 'premium': True, 'debridonly': True, 'direct': True, 'memberonly': True, 'source': 'OffCloud', 'language': self.language[0], 'quality': meta.videoQuality(), 'metadata': meta, 'file': jsonName }) except: pass return sources except: return sources
def sources(self, url, hostDict, hostprDict): sources = [] try: if url == None: raise Exception() data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) title = data['tvshowtitle'] if 'tvshowtitle' in data else data[ 'title'] year = int( data['year'] ) if 'year' in data and not data['year'] == None else None season = int( data['season'] ) if 'season' in data and not data['season'] == None else None episode = int( data['episode'] ) if 'episode' in data and not data['episode'] == None else None pack = data['pack'] if 'pack' in data else False if 'tvshowtitle' in data: if pack: query = '%s %d' % (title, season) else: query = '%s S%02dE%02d' % (title, season, episode) else: query = '%s %d' % (title, year) query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', ' ', query) url = urlparse.urljoin(self.base_link, self.search_link) page = 0 # Pages start at 0 added = False #while True: while page == 0: # KickassTorrents currently has a problem to view any other page than page 1 while sorted by seeders. Only view first page. urlNew = url % (urllib.quote_plus(query)) html = client.request(urlNew) # KickassTorrents has major mistakes in their HTML. manually remove parts to create new HTML. indexStart = html.find('<', html.find('<!-- Start of Loop -->') + 1) indexEnd = html.rfind('<!-- End of Loop -->') html = html[indexStart:indexEnd] html = html.replace('<div class="markeredBlock', '</div><div class="markeredBlock' ) # torrentname div tag not closed. html = html.replace('</span></td>', '</td>') # Dangling </span> closing tag. html = BeautifulSoup(html) page += 1 added = False htmlRows = html.find_all( 'tr', recursive=False ) # Do not search further down the tree (just the direct children). for i in range(len(htmlRows)): htmlRow = htmlRows[i] if 'firstr' in htmlRow['class']: # Header. continue htmlColumns = htmlRow.find_all('td') htmlInfo = htmlColumns[0] # Name htmlName = htmlInfo.find_all( 'a', class_='cellMainLink')[0].getText().strip() # Size htmlSize = htmlColumns[1].getText().replace(' ', ' ') # Link htmlLink = '' htmlLinks = htmlInfo.find_all('a') for j in range(len(htmlLinks)): link = htmlLinks[j] if link.has_attr('href'): link = link['href'] if link.startswith('magnet:'): htmlLink = link break # Seeds htmlSeeds = int(htmlColumns[3].getText()) # Metadata meta = metadata.Metadata(name=htmlName, title=title, year=year, season=season, episode=episode, pack=pack, link=htmlLink, size=htmlSize, seeds=htmlSeeds) # Ignore if meta.ignore(True): continue # Add sources.append({ 'url': htmlLink, 'debridonly': False, 'direct': False, 'source': 'torrent', 'language': self.language[0], 'quality': meta.videoQuality(), 'info': meta.information(), 'file': htmlName }) added = True if not added: # Last page reached with a working torrent break return sources except: return sources
def sources(self, url, hostDict, hostprDict): sources = [] try: if url == None: raise Exception() if not self.emby.enabled(): raise Exception() data = self._decode(url) type = tools.Media.TypeShow if 'tvshowtitle' in data else tools.Media.TypeMovie imdb = data['imdb'] if 'imdb' in data else None if 'exact' in data and data['exact']: exact = True title = data[ 'tvshowtitle'] if type == tools.Media.TypeShow else data[ 'title'] titles = None year = None season = None episode = None else: exact = False title = data[ 'tvshowtitle'] if type == tools.Media.TypeShow else data[ 'title'] titles = data[ 'alternatives'] if 'alternatives' in data else None year = int( data['year'] ) if 'year' in data and not data['year'] == None else None season = int( data['season'] ) if 'season' in data and not data['season'] == None else None episode = int( data['episode']) if 'episode' in data and not data[ 'episode'] == None else None if not self._query(title, year, season, episode): return sources streams = self.emby.search(type=type, title=title, year=year, season=season, episode=episode, exact=exact) if not streams: return sources for stream in streams: try: try: name = stream['file']['name'] except: name = None try: size = stream['file']['size'] except: size = None meta = metadata.Metadata(name=name, title=title, titles=titles, year=year, season=season, episode=episode, size=size) meta.setType(metadata.Metadata.TypePremium) meta.setDirect(True) try: link = stream['stream']['link'] except: continue try: meta.setLink(link) except: pass try: meta.setName(stream['file']['name']) except: pass try: meta.setSize(stream['file']['size']) except: pass try: meta.setVideoQuality(stream['video']['quality']) except: pass try: meta.setVideoCodec(stream['video']['codec']) except: pass try: meta.setVideo3D(stream['video']['3d']) except: pass try: meta.setAudioChannels(stream['audio']['channels']) except: pass try: meta.setAudioCodec(stream['audio']['codec']) except: pass try: meta.setAudioLanguages(stream['audio']['languages']) except: pass try: if len(stream['subtitle']['languages']) > 0: meta.setSubtitlesSoft() except: pass try: source = stream['stream']['source'] except: source = None try: language = stream['audio']['languages'][0] except: language = None try: quality = stream['video']['quality'] except: quality = None try: filename = stream['file']['name'] except: filename = None sources.append({ 'url': link, 'premium': True, 'direct': True, 'memberonly': True, 'source': source, 'language': language, 'quality': quality, 'file': filename, 'metadata': meta, 'external': True }) except: tools.Logger.error() return sources except: tools.Logger.error() return sources
def sources(self, url, hostDict, hostprDict): sources = [] try: if url == None: raise Exception() data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) pack = None if 'exact' in data and data['exact']: query = title = data[ 'tvshowtitle'] if 'tvshowtitle' in data else data['title'] year = None season = None episode = None pack = False packCount = None else: title = data['tvshowtitle'] if 'tvshowtitle' in data else data[ 'title'] year = int( data['year'] ) if 'year' in data and not data['year'] == None else None season = int( data['season'] ) if 'season' in data and not data['season'] == None else None episode = int( data['episode']) if 'episode' in data and not data[ 'episode'] == None else None pack = data['pack'] if 'pack' in data else False packCount = data['packcount'] if 'packcount' in data else None if 'tvshowtitle' in data: if pack: query = '%s saison %d' % (title, season) else: query = '%s S%02dE%02d' % (title, season, episode) else: query = title # Do not include year, otherwise there are few results. query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', ' ', query) type = self.type_shows if 'tvshowtitle' in data else self.type_movies url = urlparse.urljoin(self.base_link, self.search_link) % ( type, urllib.quote_plus(query)) html = BeautifulSoup(client.request(url)) htmlTable = html.find_all( 'table', class_='cust-table')[0].find_all('tbody', recursive=False)[0] htmlRows = htmlTable.find_all('tr', recursive=False) self.tLock = threading.Lock() self.tLinks = [None] * len(htmlRows) threads = [] for i in range(len(htmlRows)): urlTorrent = self.base_link + htmlRows[i].find_all( 'td', recursive=False)[0].find_all('a')[0]['href'] threads.append( threading.Thread(target=self._link, args=(urlTorrent, i))) [thread.start() for thread in threads] timerEnd = tools.Settings.getInteger( 'scraping.providers.timeout') - 8 timer = tools.Time(start=True) while timer.elapsed() < timerEnd and any( [thread.is_alive() for thread in threads]): tools.Time.sleep(0.5) self.tLock.acquire( ) # Just lock in case the threads are still running. for i in range(len(htmlRows)): # Stop searching 8 seconds before the provider timeout, otherwise might continue searching, not complete in time, and therefore not returning any links. if timer.elapsed() > timerEnd: break htmlRow = htmlRows[i] htmlColumns = htmlRow.find_all('td', recursive=False) # Name htmlName = htmlColumns[0].getText().strip() if not 'tvshowtitle' in data: htmlName = re.sub( r"^(.*?)(TRUE|TRUEFRENCH|FRENCH|VOSTFR|VO)(.*)([0-9]{4})$", r"\1 \4 \2\3", htmlName) # Link htmlLink = self.tLinks[i] # Size htmlSize = htmlColumns[1].getText().strip().lower().replace( ' mo', 'MB').replace(' go', 'GB').replace(' o', 'b') # Seeds try: htmlSeeds = int(htmlColumns[2].getText().strip()) except: htmlSeeds = None # Metadata meta = metadata.Metadata(name=htmlName, title=title, year=year, season=season, episode=episode, pack=pack, packCount=packCount, link=htmlLink, size=htmlSize, seeds=htmlSeeds) # Ignore if meta.ignore(False): continue # Add sources.append({ 'url': htmlLink, 'debridonly': False, 'direct': False, 'source': 'torrent', 'language': self.language[0], 'quality': meta.videoQuality(), 'metadata': meta, 'file': htmlName }) self.tLock.release() return sources except: tools.Logger.error() return sources
def sources(self, url, hostDict, hostprDict): sources = [] try: if url == None: raise Exception() ignoreContains = None data = self._decode(url) if 'exact' in data and data['exact']: query = title = data[ 'tvshowtitle'] if 'tvshowtitle' in data else data['title'] titles = None year = None season = None episode = None pack = False packCount = None else: title = data['tvshowtitle'] if 'tvshowtitle' in data else data[ 'title'] titles = data[ 'alternatives'] if 'alternatives' in data else None year = int( data['year'] ) if 'year' in data and not data['year'] == None else None season = int( data['season'] ) if 'season' in data and not data['season'] == None else None episode = int( data['episode']) if 'episode' in data and not data[ 'episode'] == None else None pack = data['pack'] if 'pack' in data else False packCount = data['packcount'] if 'packcount' in data else None if 'tvshowtitle' in data: # Search special episodes by name. All special episodes are added to season 0 by Trakt and TVDb. Hence, do not search by filename (eg: S02E00), since the season is not known. if (season == 0 or episode == 0) and ('title' in data and not data['title'] == None and not data['title'] == ''): title = '%s %s' % ( data['tvshowtitle'], data['title'] ) # Change the title for metadata filtering. query = title ignoreContains = len(data['title']) / float( len(title) ) # Increase the required ignore ration, since otherwise individual episodes and season packs are found as well. else: # Only this format works for season packs. # Does not support individual episodes. if pack: query = '%s S%02d' % (title, season) else: pack = True query = '%s сезон %d' % (title, season) else: query = '%s %d' % (title, year) query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', ' ', query) if not self._query(query): return sources url = urlparse.urljoin(self.base_link, self.search_link) pageLimit = tools.Settings.getInteger('scraping.providers.pages') pageCounter = 0 page = 0 # Pages start at 0 added = False timerEnd = tools.Settings.getInteger( 'scraping.providers.timeout') - 8 timer = tools.Time(start=True) while True: # Stop searching 8 seconds before the provider timeout, otherwise might continue searching, not complete in time, and therefore not returning any links. if timer.elapsed() > timerEnd: break pageCounter += 1 if pageLimit > 0 and pageCounter > pageLimit: break urlNew = url % (urllib.quote_plus(query), page) html = client.request(urlNew) # There is a quote missing. # Replace and add custom class for easy identification. html = html.replace( 'style="width:1095px; class=" lista">', 'style="width:1095px;" class="gaia lista">') htmlLower = html.lower() start = htmlLower.index('class="gaia') start = htmlLower.index('</tr>', start) + 5 end = htmlLower.index('</table>', start) + 8 html = html[start:end] html = html.replace('\n', '').replace('\r', '') html = html.replace('</TR>', '</tr>') htmlRows = html.split('</tr>') page += 1 added = False for htmlRow in htmlRows: # Link try: htmlLink = re.search('(magnet:.*?)>', htmlRow, re.IGNORECASE).group(1) except: continue # Name try: htmlName = ' ' + re.search( 'details\.php.*?>(.*?)<', htmlRow, re.IGNORECASE).group(1).strip() except: htmlName = '' # Category try: htmlName += ' ' + re.search( 'border=0\s+alt="(.*?)"', htmlRow, re.IGNORECASE).group(1).strip() except: pass # Size try: htmlSize = re.search('>(\d+\.+\d+ [g|m]b)<', htmlRow, re.IGNORECASE).group(1).strip() except: htmlSize = None # Seeds try: htmlSeeds = int( re.search('>(\d+)<', htmlRow, re.IGNORECASE).group(1).strip()) except: htmlSeeds = None htmlName = re.sub('[^A-Za-z0-9\s]', ' ', htmlName) htmlName = re.sub('\s\s+', ' ', htmlName).strip() # Otherwise if 3D appears multiple time in name, it will be ignored # Eg: 3D Avatar 3D 2009 1080p BluR 3D try: htmlIndex = htmlName.lower().index('3d') htmlName = htmlName.replace('3D', '').replace('3D', '') if htmlIndex >= 0: htmlName += '3D' except: pass # Metadata meta = metadata.Metadata(name=htmlName, title=title, titles=titles, year=year, season=season, episode=episode, pack=pack, packCount=packCount, link=htmlLink, size=htmlSize, seeds=htmlSeeds) # Ignore meta.ignoreAdjust(contains=ignoreContains) if meta.ignore(True): continue # Add sources.append({ 'url': htmlLink, 'debridonly': False, 'direct': False, 'source': 'torrent', 'language': self.language[0], 'quality': meta.videoQuality(), 'metadata': meta, 'file': htmlName, 'pack': pack }) added = True if not added: # Last page reached with a working torrent break return sources except: return sources
def sources(self, url, hostDict, hostprDict): sources = [] try: if url == None: raise Exception() data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) if 'exact' in data and data['exact']: query = title = data['tvshowtitle'] if 'tvshowtitle' in data else data['title'] year = None season = None episode = None pack = False packCount = None else: title = data['tvshowtitle'] if 'tvshowtitle' in data else data['title'] year = int(data['year']) if 'year' in data and not data['year'] == None else None season = int(data['season']) if 'season' in data and not data['season'] == None else None episode = int(data['episode']) if 'episode' in data and not data['episode'] == None else None pack = data['pack'] if 'pack' in data else False packCount = data['packcount'] if 'packcount' in data else None if 'tvshowtitle' in data: if pack: query = '%s %d' % (title, season) else: query = '%s S%02dE%02d' % (title, season, episode) else: query = '%s %d' % (title, year) query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', ' ', query) url = urlparse.urljoin(self.base_link, self.search_link) category = self.category_shows if ('tvshowtitle' in data and not data['tvshowtitle'] == None and not data['tvshowtitle'] == '') else self.category_movies pageLimit = tools.Settings.getInteger('scraping.providers.pages') pageCounter = 0 page = 1 added = False timerEnd = tools.Settings.getInteger('scraping.providers.timeout') - 8 timer = tools.Time(start = True) while True: # Stop searching 8 seconds before the provider timeout, otherwise might continue searching, not complete in time, and therefore not returning any links. if timer.elapsed() > timerEnd: break pageCounter += 1 if pageLimit > 0 and pageCounter > pageLimit: break urlNew = url % (category, urllib.quote_plus(query), page) html = BeautifulSoup(client.request(urlNew)) page += 1 added = False # NB: Do not use "tbody class=results", since the table has inner div/style that breaks parsing. htmlRows = html.find_all('tr', class_ = 'result') # Do not search further down the tree (just the direct children), because that will also retrieve the header row. for i in range(len(htmlRows)): try: htmlRow = htmlRows[i] htmlColumns = htmlRow.find_all('td', recursive = False) # Name htmlName = htmlColumns[0].find_all('a')[0].getText().strip() # Size htmlSize = htmlColumns[1].getText().strip() # Link htmlLink = '' htmlLinks = htmlColumns[0].find_all('a') for j in range(len(htmlLinks)): link = htmlLinks[j]['href'] if link.startswith('magnet:'): htmlLink = link break # Seeds htmlSeeds = int(re.sub('[^0-9]', '', htmlColumns[4].getText().strip())) # Metadata meta = metadata.Metadata(name = htmlName, title = title, year = year, season = season, episode = episode, pack = pack, packCount = packCount, link = htmlLink, size = htmlSize, seeds = htmlSeeds) # Ignore if meta.ignore(True): continue # Add sources.append({'url' : htmlLink, 'debridonly' : False, 'direct' : False, 'source' : 'torrent', 'language' : self.language[0], 'quality': meta.videoQuality(), 'metadata' : meta, 'file' : htmlName}) added = True except: pass if not added: # Last page reached with a working torrent break return sources except: return sources
def sources(self, url, hostDict, hostprDict): sources = [] try: if url == None: raise Exception() if not self.enabled: raise Exception() data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) if 'exact' in data and data['exact']: query = title = data['tvshowtitle'] if 'tvshowtitle' in data else data['title'] year = None season = None episode = None else: title = data['tvshowtitle'] if 'tvshowtitle' in data else data['title'] year = int(data['year']) if 'year' in data and not data['year'] == None else None season = int(data['season']) if 'season' in data and not data['season'] == None else None episode = int(data['episode']) if 'episode' in data and not data['episode'] == None else None query = '%s S%02dE%02d' % (title, season, episode) if 'tvshowtitle' in data else '%s %d' % (title, year) query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', ' ', query) if not self.streamQuality == None and not self.streamQuality == '' and not self.streamQuality == 'sd': query += ' %s' % self.streamQuality if not self.streamLanguage == None and not self.streamLanguage == '' and not self.streamLanguage == 'un': query += ' lang:%s' % self.streamLanguage query = urllib.quote_plus(query) hostDict = hostprDict + hostDict iterations = self.streamLimit / float(self.streamIncrease) if iterations < 1: last = self.streamLimit iterations = 1 else: difference = iterations - math.floor(iterations) last = self.streamIncrease if difference == 0 else int(difference * self.streamIncrease) iterations = int(math.ceil(iterations)) timerEnd = tools.Settings.getInteger('scraping.providers.timeout') - 8 timer = tools.Time(start = True) last = settings.Prontv.apiLast() api = settings.Prontv.apiNext() first = last for type in self.types: for offset in range(iterations): # Stop searching 8 seconds before the provider timeout, otherwise might continue searching, not complete in time, and therefore not returning any links. if timer.elapsed() > timerEnd: break if len(sources) >= self.streamLimit: break searchCount = last if offset == iterations - 1 else self.streamIncrease searchFrom = (offset * self.streamIncrease) + 1 results = self.retrieve(type, api, query, searchCount, searchFrom) try: while self.limit(results): last = settings.Prontv.apiLast() if first == last: break api = settings.Prontv.apiNext() results = self.retrieve(type, api, query, searchCount, searchFrom) if self.limit(results): interface.Dialog.notification(title = 35261, message = interface.Translation.string(33952) + ' (' + str(results['fetchedtoday']) + ' ' + interface.Translation.string(35222) + ')', icon = interface.Dialog.IconWarning) tools.Time.sleep(2) return sources except: pass results = results['result'] added = False for result in results: # Information jsonName = result['title'] jsonSize = result['sizeinternal'] jsonExtension = result['extension'] jsonLanguage = result['lang'] jsonHoster = result['hostername'].lower() jsonLink = result['hosterurls'][0]['url'] # Ignore Hosters if not jsonHoster in hostDict: continue # Ignore Non-Videos # Alluc often has other files, such as SRT, also listed as streams. if not jsonExtension == None and not jsonExtension == '' and not tools.Video.extensionValid(jsonExtension): continue # Metadata meta = metadata.Metadata(name = jsonName, title = title, year = year, season = season, episode = episode, link = jsonLink, size = jsonSize) # Ignore if meta.ignore(False): continue # Add sources.append({'url' : jsonLink, 'debridonly' : False, 'direct' : False, 'memberonly' : True, 'source' : jsonHoster, 'language' : jsonLanguage, 'quality': meta.videoQuality(), 'metadata' : meta, 'file' : jsonName}) added = True if not added: break return sources except: return sources
def sources(self, url, hostDict, hostprDict): sources = [] try: if url == None: raise Exception() data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) if 'exact' in data and data['exact']: query = title = data[ 'tvshowtitle'] if 'tvshowtitle' in data else data['title'] year = None season = None episode = None else: title = data['tvshowtitle'] if 'tvshowtitle' in data else data[ 'title'] year = int( data['year'] ) if 'year' in data and not data['year'] == None else None season = int( data['season'] ) if 'season' in data and not data['season'] == None else None episode = int( data['episode']) if 'episode' in data and not data[ 'episode'] == None else None query = '%s S%02dE%02d' % ( title, season, episode) if 'tvshowtitle' in data else '%s %d' % (title, year) query = urllib.quote_plus(query) # The returned website is different to the normal website. # Probably a mobile version. url = urlparse.urljoin(self.base_link, self.search_link) % query html = BeautifulSoup(client.request(url)) htmlRows = html.find_all('div', class_='yt-lockup-content') for htmlRow in htmlRows: htmlInfo = htmlRow.find_all('a')[0] # Name htmlName = htmlInfo.getText().strip() # Link htmlLink = urlparse.urljoin(self.base_link, htmlInfo['href']) # Duration htmlDuration = 0 try: htmlDurationItem = htmlRow.find_all( 'span')[0].getText().lower() indexStart = htmlDurationItem.find(':') if indexStart > 0: indexStart += 1 indexEnd = htmlDurationItem.find('.', indexStart) if indexEnd > 0: htmlDuration = htmlDurationItem[ indexStart:indexEnd].strip() htmlDuration = htmlDuration.split(':') if len(htmlDuration) == 3: htmlDuration = (int(htmlDuration[0]) * 3600) + (int(htmlDuration[1]) * 60) + int( htmlDuration[2]) else: htmlDuration = (int(htmlDuration[0]) * 60) + int(htmlDuration[1]) else: htmlDuration = 0 except: pass # Ignore trailers, etc. if any(s in htmlName.lower() for s in self.excludes): continue # Ignore less than 10 minutes. if htmlDuration < 600: continue # Metadata meta = metadata.Metadata(name=htmlName, title=title, year=year, season=season, episode=episode, link=htmlLink) # Ignore if meta.ignore(False): continue # Add sources.append({ 'url': htmlLink, 'debridonly': False, 'direct': False, 'source': 'youtube', 'language': self.language[0], 'quality': meta.videoQuality(), 'metadata': meta, 'file': htmlName }) added = True return sources except: return sources
def sources(self, url, hostDict, hostprDict): sources = [] try: if url == None: raise Exception() data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) pack = None if 'exact' in data and data['exact']: query = title = data[ 'tvshowtitle'] if 'tvshowtitle' in data else data['title'] year = None season = None episode = None pack = False packCount = None else: title = data['tvshowtitle'] if 'tvshowtitle' in data else data[ 'title'] year = int( data['year'] ) if 'year' in data and not data['year'] == None else None season = int( data['season'] ) if 'season' in data and not data['season'] == None else None episode = int( data['episode']) if 'episode' in data and not data[ 'episode'] == None else None pack = data['pack'] if 'pack' in data else False packCount = data['packcount'] if 'packcount' in data else None if 'tvshowtitle' in data: if pack: query = ['%s %d' % (title, season)] else: query = [ '%s S%02dE%02d' % (title, season, episode), '%s %02dx%02d' % (title, season, episode) ] else: query = ['%s %d' % (title, year)] query = [ re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', ' ', q) for q in query ] for q in query: url = urlparse.urljoin(self.base_link, self.search_link) % urllib.quote_plus(q) # Fix HTML closing tags. html = client.request(url, ignoreSsl=True) # SSL Certificate fails. html = re.sub('<span.*>\s*<\/span>\s*<td', '</td><td', html) html = BeautifulSoup(html) htmlRows = html.find_all('tr', class_=['odd', 'odd2']) for i in range(len(htmlRows)): try: htmlColumns = htmlRows[i].find_all('td', recursive=False) # Name # Name is abbriviated, use the name in the link instead. htmlName = htmlColumns[1].find_all('a')[0]['href'] htmlName = htmlName[htmlName.rfind('/') + 1:] htmlName = htmlName.replace('_', ' ') # Link htmlLink = htmlColumns[3].find_all('input')[0]['value'] htmlLink = network.Container(htmlLink).torrentMagnet( title=q, trackers=self.trackers) # Size htmlSize = htmlColumns[2].getText().strip() # Seeds try: htmlSeeds = int(htmlColumns[5].getText().strip()) except: htmlSeeds = None # Metadata meta = metadata.Metadata(name=htmlName, title=title, year=year, season=season, episode=episode, pack=pack, packCount=packCount, link=htmlLink, size=htmlSize, seeds=htmlSeeds) meta.mIgnoreLength = 8 # Relax this, otherwise too many links are filtered out (eg: Avatar 2009). # Ignore if meta.ignore(True): continue # Add sources.append({ 'url': htmlLink, 'debridonly': False, 'direct': False, 'source': 'torrent', 'language': self.language[0], 'quality': meta.videoQuality(), 'metadata': meta, 'file': htmlName, 'pack': pack }) except: pass return sources except: return sources
def sources(self, url, hostDict, hostprDict): sources = [] try: if url == None: raise Exception() data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) if 'exact' in data and data['exact']: query = title = data['tvshowtitle'] if 'tvshowtitle' in data else data['title'] year = None season = None episode = None pack = False packCount = None else: title = data['tvshowtitle'] if 'tvshowtitle' in data else data['title'] year = int(data['year']) if 'year' in data and not data['year'] == None else None season = int(data['season']) if 'season' in data and not data['season'] == None else None episode = int(data['episode']) if 'episode' in data and not data['episode'] == None else None pack = data['pack'] if 'pack' in data else False packCount = data['packcount'] if 'packcount' in data else None if 'tvshowtitle' in data: if pack: query = '%s %d' % (title, season) else: query = '%s S%02dE%02d' % (title, season, episode) else: query = '%s %d' % (title, year) query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', ' ', query) query = urllib.quote_plus(query) category = self.category_tvshows if ('tvshowtitle' in data and not data['tvshowtitle'] == None and not data['tvshowtitle'] == '') else self.category_movies pageLimit = tools.Settings.getInteger('scraping.providers.pages') pageCounter = 0 page = 1 # Pages start at 1 added = False timerEnd = tools.Settings.getInteger('scraping.providers.timeout') - 8 timer = tools.Time(start = True) while True: # Stop searching 8 seconds before the provider timeout, otherwise might continue searching, not complete in time, and therefore not returning any links. if timer.elapsed() > timerEnd: break pageCounter += 1 if pageLimit > 0 and pageCounter > pageLimit: break urlNew = (self.base_link + self.search_link) % (query, category, page) html = BeautifulSoup(client.request(urlNew)) htmlTable = html.find_all('div', class_ = 'content')[0].find_all('table', class_ = 'table-sm', recursive = False)[1] htmlRows = htmlTable.find_all('tr', recursive = False) page += 1 added = False for i in range(len(htmlRows)): htmlRow = htmlRows[i] htmlColumns = htmlRow.find_all('td', recursive = False) # Name htmlName = htmlColumns[0].getText().strip() # Size htmlSize = htmlColumns[1].getText().strip() # Link htmlLink = htmlRow.find_all('td', recursive = False)[0].find_all('a')[0]['href'].strip() htmlLink = re.search('\/torrent\/(.*)\/', htmlLink, re.IGNORECASE).group(1) htmlLink = (self.base_link + self.torrent_link) % htmlLink # Seeds htmlSeeds = int(htmlColumns[3].getText().strip()) # Metadata meta = metadata.Metadata(name = htmlName, title = title, year = year, season = season, episode = episode, pack = pack, packCount = packCount, link = htmlLink, size = htmlSize, seeds = htmlSeeds) # Ignore if meta.ignore(True): continue # Add sources.append({'url' : htmlLink, 'debridonly' : False, 'direct' : False, 'source' : 'torrent', 'language' : self.language[0], 'quality': meta.videoQuality(), 'metadata' : meta, 'file' : htmlName}) added = True if not added: # Last page reached with a working torrent break return sources except: return sources
def sources(self, url, hostDict, hostprDict): sources = [] try: if url == None: raise Exception() ignoreContains = None data = self._decode(url) if 'exact' in data and data['exact']: query = title = data[ 'tvshowtitle'] if 'tvshowtitle' in data else data['title'] titles = None year = None season = None episode = None pack = False packCount = None else: title = data['tvshowtitle'] if 'tvshowtitle' in data else data[ 'title'] titles = data[ 'alternatives'] if 'alternatives' in data else None year = int( data['year'] ) if 'year' in data and not data['year'] == None else None season = int( data['season'] ) if 'season' in data and not data['season'] == None else None episode = int( data['episode']) if 'episode' in data and not data[ 'episode'] == None else None pack = data['pack'] if 'pack' in data else False packCount = data['packcount'] if 'packcount' in data else None if 'tvshowtitle' in data: # Search special episodes by name. All special episodes are added to season 0 by Trakt and TVDb. Hence, do not search by filename (eg: S02E00), since the season is not known. if (season == 0 or episode == 0) and ('title' in data and not data['title'] == None and not data['title'] == ''): title = '%s %s' % ( data['tvshowtitle'], data['title'] ) # Change the title for metadata filtering. query = title ignoreContains = len(data['title']) / float( len(title) ) # Increase the required ignore ration, since otherwise individual episodes and season packs are found as well. else: if pack: query = '%s %d' % (title, season) else: query = '%s S%02dE%02d' % (title, season, episode) else: query = '%s %d' % (title, year) query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', ' ', query) if not self._query(query): return sources query = urllib.quote_plus(query) category = self.category_shows if 'tvshowtitle' in data else self.category_movies url = urlparse.urljoin(self.base_link, self.search_link) pageLimit = tools.Settings.getInteger('scraping.providers.pages') pageCounter = 0 page = 1 added = False timerEnd = tools.Settings.getInteger( 'scraping.providers.timeout') - 8 timer = tools.Time(start=True) ''' while True: # Stop searching 8 seconds before the provider timeout, otherwise might continue searching, not complete in time, and therefore not returning any links. if timer.elapsed() > timerEnd: break pageCounter += 1 if pageLimit > 0 and pageCounter > pageLimit: break urlNew = url % (category, query, page) html = client.request(urlNew) # Demonoid does not have a closing tag for the rows. # This causes BeautifulSoup to only detect the first row. # Manually add a closing </tr> tag, except fore the first row. html = html.replace('<tr align="left" bgcolor="#CCCCCC">', '<tr align="left" bgcolor="">', 1) html = html.replace('<tr align="left" bgcolor="#CCCCCC">', '</tr><tr align="left" bgcolor="#CCCCCC">') html = BeautifulSoup(html) page += 1 added = False htmlTable = html.find_all('td', class_ = 'ctable_content_no_pad')[0].find_all('table', recursive = False)[1] htmlRows = html.find_all('tr') i = 0 while i < len(htmlRows): try: htmlRow = htmlRows[i] i += 1 # Normal loop increment. if len(htmlRow.find_all('td', {'rowspan' : '2'})) == 0: continue # Name htmlName = htmlRow.find_all('td', {'colspan' : '9'})[0].find_all('a')[0].getText().strip() htmlRow = htmlRows[i] i += 1 # Go to next row, because items are split over to lines. # Size htmlSize = htmlColumns[3].getText().strip() # Link htmlLink = htmlColumns[2].find_all('a')[0]['href'] # Seeds htmlSeeds = int(htmlColumns[6].getText().strip()) items = htmlColumns[0].find_all('a') # Release try: htmlRelease = items[1].getText() if not 'other' in htmlRelease.lower(): htmlName += ' ' + htmlRelease except: pass # Language try: htmlLanguage = items[2].getText() except: htmlLanguage = None # Metadata meta = metadata.Metadata(name = htmlName, title = title, titles = titles, year = year, season = season, episode = episode, pack = pack, packCount = packCount, link = htmlLink, size = htmlSize, seeds = htmlSeeds, languageAudio = htmlLanguage) # Ignore meta.ignoreAdjust(contains = ignoreContains) if meta.ignore(True): continue # Add sources.append({'url' : htmlLink, 'debridonly' : False, 'direct' : False, 'source' : 'torrent', 'language' : self.language[0], 'quality': meta.videoQuality(), 'metadata' : meta, 'file' : htmlName}) added = True except: pass ''' while True: # Stop searching 8 seconds before the provider timeout, otherwise might continue searching, not complete in time, and therefore not returning any links. if timer.elapsed() > timerEnd: break pageCounter += 1 if pageLimit > 0 and pageCounter > pageLimit: break urlNew = url % (category, query, page) html = client.request(urlNew) page += 1 added = False htmlRows = re.findall( '<!--\s*tstart\s*-->(.*?)<tr\s*align="left"\s*bgcolor="#CCCCCC">', html, re.M | re.S) htmlRows = ['<tr><td>' + i for i in htmlRows] for htmlRow in htmlRows: try: htmlRow = BeautifulSoup(htmlRow) htmlColumns = htmlRow.find_all('td') # Name htmlName = htmlRow.find_all('a')[1].getText().strip() # Size htmlSize = htmlColumns[4].getText().strip() # Link htmlLink = htmlRow.find_all('a')[1]['href'] htmlLink = urlparse.urljoin(self.base_link, htmlLink) htmlLink = re.search('genidy=(.*)', htmlLink, re.IGNORECASE) if not htmlLink: continue htmlLink = self.download_link % htmlLink.group(1) # Seeds try: htmlSeeds = int(htmlColumns[7].getText().strip()) except: htmlSeeds = 0 items = htmlColumns[0].find_all('a') # Metadata meta = metadata.Metadata(name=htmlName, title=title, titles=titles, year=year, season=season, episode=episode, pack=pack, packCount=packCount, link=htmlLink, size=htmlSize, seeds=htmlSeeds) # Ignore meta.ignoreAdjust(contains=ignoreContains) if meta.ignore(True): continue # Add sources.append({ 'url': htmlLink, 'debridonly': False, 'direct': False, 'source': 'torrent', 'language': self.language[0], 'quality': meta.videoQuality(), 'metadata': meta, 'file': htmlName }) added = True except: pass if not added: # Last page reached with a working torrent break return sources except: return sources
def sources(self, url, hostDict, hostprDict): sources = [] try: if url == None: raise Exception() if not self.orion.accountEnabled(): raise Exception() data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) type = orionoid.Orionoid.TypeShow if 'tvshowtitle' in data else orionoid.Orionoid.TypeMovie imdb = data['imdb'] if 'imdb' in data else None if 'exact' in data and data['exact']: query = data[ 'tvshowtitle'] if type == orionoid.Orionoid.TypeShow else data[ 'title'] title = None year = None season = None episode = None else: query = None title = data[ 'tvshowtitle'] if type == orionoid.Orionoid.TypeShow else data[ 'title'] year = int( data['year'] ) if 'year' in data and not data['year'] == None else None season = int( data['season'] ) if 'season' in data and not data['season'] == None else None episode = int( data['episode']) if 'episode' in data and not data[ 'episode'] == None else None streams = self.orion.streamRetrieve(type=type, query=query, imdb=imdb, title=title, year=year, season=season, episode=episode) if not streams: return sources if type == orionoid.Orionoid.TypeMovie: item = streams['movie']['id']['orion'] elif type == orionoid.Orionoid.TypeShow: item = streams['episode']['id']['orion'] streams = streams['streams'] for stream in streams: try: meta = metadata.Metadata() try: meta.setPopularity(stream['popularity']['percent']) except: pass try: meta.setLink(stream['stream']['link']) except: pass try: if stream['stream']['seeds'] > 0: meta.setSeeds(stream['stream']['seeds']) except: pass try: if stream['stream'][ 'type'] == orionoid.Orionoid.StreamUsenet and stream[ 'stream']['time'] and stream['stream'][ 'time'] >= 0: age = stream['stream']['time'] else: age = stream['time']['updated'] meta.setAge( int( round( max(1, tools.Time.timestamp() - age) / 86400))) except: pass try: meta.setName(stream['file']['name']) except: pass try: meta.setSize(stream['file']['size']) except: pass try: meta.setPack(stream['file']['pack']) except: pass try: meta.setName(stream['meta']['release']) except: pass try: meta.setUploader(stream['meta']['uploader']) except: pass try: meta.setEdition(stream['meta']['edition']) except: pass try: meta.setVideoQuality(stream['video']['quality']) except: pass try: meta.setVideoCodec(stream['video']['codec']) except: pass try: meta.setVideo3D(stream['video']['3d']) except: pass try: if stream['audio'][ 'type'] == orionoid.Orionoid.AudioDubbed: meta.setAudioDubbed() except: pass try: meta.setAudioChannels(stream['audio']['channels']) except: pass try: meta.setAudioCodec(stream['audio']['codec']) except: pass try: meta.setAudioLanguages(stream['audio']['languages']) except: pass try: if stream['subtitle'][ 'type'] == orionoid.Orionoid.SubtitleSoft: meta.setSubtitlesSoft() except: pass try: if stream['subtitle'][ 'type'] == orionoid.Orionoid.SubtitleHard: meta.setSubtitlesHard() except: pass try: link = stream['stream']['link'] except: continue try: direct = stream['access']['direct'] except: direct = False meta.setDirect(direct) # Only set the cache status if cache inspection is disabled. # If cache inspection is enabled, do not use the old/inaccurate values from Orion. cache = {} if not self.cache: try: cache['premiumize'] = stream['access'][ 'premiumize'] except: pass try: cache['offcloud'] = stream['access']['offcloud'] except: pass try: cache['realdebrid'] = stream['access'][ 'realdebrid'] except: pass try: if stream['stream'][ 'type'] == orionoid.Orionoid.StreamHoster: if stream['stream']['hoster']: source = stream['stream']['hoster'] else: source = network.Networker.linkDomain( stream['stream']['link'], subdomain=False, ip=False).lower() if source: if 'gvideo' in source or ( 'google' in source and 'vid' in source) or ( 'google' in source and 'link' in source): source = 'GoogleVideo' elif 'google' in source and ( 'usercontent' in source or 'cloud' in source): source = 'GoogleCloud' elif 'google' in source and 'doc' in source: source = 'GoogleDocs' elif 'google' in source and 'drive' in source: source = 'GoogleDrive' else: source = stream['stream']['type'] except: source = None if not source: source = '' try: provider = stream['stream']['source'] except: provider = None try: language = stream['audio']['languages'][0] except: language = None try: quality = stream['video']['quality'] except: quality = None try: filename = stream['file']['name'] except: filename = None orion = {} try: orion['stream'] = stream['id'] except: pass try: orion['item'] = item except: pass sources.append({ 'orion': orion, 'url': link, 'direct': direct, 'cache': cache, 'source': source, 'provider': provider, 'language': language, 'quality': quality, 'file': filename, 'metadata': meta, 'pack': meta.pack(), 'external': True }) except: pass return sources except: tools.Logger.error() return sources
def sources(self, url, hostDict, hostprDict): sources = [] try: if url == None: raise Exception() data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) if 'exact' in data and data['exact']: query = title = data[ 'tvshowtitle'] if 'tvshowtitle' in data else data['title'] type = None year = None season = None episode = None pack = False packCount = None else: type = 'tv' if 'tvshowtitle' in data else 'movie' title = data['tvshowtitle'] if 'tvshowtitle' in data else data[ 'title'] year = int( data['year'] ) if 'year' in data and not data['year'] == None else None season = int( data['season'] ) if 'season' in data and not data['season'] == None else None episode = int( data['episode']) if 'episode' in data and not data[ 'episode'] == None else None pack = data['pack'] if 'pack' in data else False packCount = data['packcount'] if 'packcount' in data else None if 'tvshowtitle' in data: if pack: query = '%s %d' % (title, season) else: query = '%s S%02dE%02d' % (title, season, episode) else: query = '%s %d' % (title, year) query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', ' ', query) url = urlparse.urljoin(self.base_link, self.search_link) pageLimit = tools.Settings.getInteger('scraping.providers.pages') pageCounter = 0 page = 1 added = False timerEnd = tools.Settings.getInteger( 'scraping.providers.timeout') - 8 timer = tools.Time(start=True) while True: # Stop searching 8 seconds before the provider timeout, otherwise might continue searching, not complete in time, and therefore not returning any links. if timer.elapsed() > timerEnd: break pageCounter += 1 if pageLimit > 0 and pageCounter > pageLimit: break urlNew = url % (urllib.quote_plus(query), page) html = BeautifulSoup(client.request(urlNew)) page += 1 added = False htmlTable = html.find_all('table', class_='table')[0] htmlRows = htmlTable.find_all('td', class_='x-item') for i in range(0, len(htmlRows)): try: htmlRow = htmlRows[i] # Name htmlName = htmlRow.find_all( 'a', class_='title')[0]['title'].strip() # Size htmlSize = htmlRow.find_all( 'div', class_='tail')[0].getText().replace( '\n', '').replace('\r', '').replace(' ', ' ').strip() htmlSize = re.search('.*[sS]ize:(.*)[dD]ownloads.*', htmlSize, re.IGNORECASE) if htmlSize: htmlSize = htmlSize.group(1).strip() else: htmlSize = None # Link htmlLink = htmlRow.find_all( 'div', class_='tail')[0].find_all( 'a', class_='title')[0]['href'].strip() # Metadata meta = metadata.Metadata(name=htmlName, title=title, year=year, season=season, episode=episode, pack=pack, packCount=packCount, link=htmlLink, size=htmlSize, seeds=1) # Ignore if meta.ignore(True): continue # Add sources.append({ 'url': htmlLink, 'debridonly': False, 'direct': False, 'source': 'torrent', 'language': self.language[0], 'quality': meta.videoQuality(), 'metadata': meta, 'file': htmlName }) added = True except: pass if not added: # Last page reached with a working torrent break return sources except: return sources
def sources(self, url, hostDict, hostprDict): sources = [] try: if url == None: raise Exception() data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) pack = None if 'exact' in data and data['exact']: query = title = data[ 'tvshowtitle'] if 'tvshowtitle' in data else data['title'] year = None season = None episode = None pack = False packCount = None else: title = data['tvshowtitle'] if 'tvshowtitle' in data else data[ 'title'] year = int( data['year'] ) if 'year' in data and not data['year'] == None else None season = int( data['season'] ) if 'season' in data and not data['season'] == None else None episode = int( data['episode']) if 'episode' in data and not data[ 'episode'] == None else None pack = data['pack'] if 'pack' in data else False packCount = data['packcount'] if 'packcount' in data else None if 'tvshowtitle' in data: # Only this format works for season packs. # Does not support individual episodes. if pack: query = '%s S%02d' % (title, season) else: pack = True query = '%s сезон %d' % (title, season) else: query = '%s %d' % (title, year) query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', ' ', query) url = urlparse.urljoin(self.base_link, self.search_link) % urllib.quote_plus(query) html = BeautifulSoup(client.request(url)) htmlTable = html.find_all( 'table', class_='tablesorter')[0].find_all('tbody', recursive=False)[0] htmlRows = htmlTable.find_all('tr', recursive=False) for i in range(len(htmlRows)): htmlRow = htmlRows[i] htmlColumns = htmlRow.find_all('td') # Name htmlName = htmlColumns[1].find_all('a')[0].getText().strip() # Link htmlLink = self.base_link + self.download_link + htmlColumns[ 2].find_all('a')[0]['href'] # Size htmlSize = long( htmlColumns[3].find_all('u')[0].getText().strip()) # Seeds try: htmlSeeds = int(htmlColumns[4].getText().strip()) except: htmlSeeds = None # Metadata meta = metadata.Metadata(name=htmlName, title=title, year=year, season=season, episode=episode, pack=pack, packCount=packCount, link=htmlLink, size=htmlSize, seeds=htmlSeeds) # Ignore if meta.ignore(True): continue # Add sources.append({ 'url': htmlLink, 'debridonly': False, 'direct': False, 'source': 'torrent', 'language': self.language[0], 'quality': meta.videoQuality(), 'metadata': meta, 'file': htmlName, 'pack': pack }) return sources except: return sources
def sources(self, url, hostDict, hostprDict): sources = [] try: if url == None: raise Exception() ignoreContains = None data = self._decode(url) if 'exact' in data and data['exact']: query = title = data[ 'tvshowtitle'] if 'tvshowtitle' in data else data['title'] titles = None year = None season = None episode = None pack = False packCount = None else: title = data['tvshowtitle'] if 'tvshowtitle' in data else data[ 'title'] titles = data[ 'alternatives'] if 'alternatives' in data else None year = int( data['year'] ) if 'year' in data and not data['year'] == None else None season = int( data['season'] ) if 'season' in data and not data['season'] == None else None episode = int( data['episode']) if 'episode' in data and not data[ 'episode'] == None else None pack = data['pack'] if 'pack' in data else False packCount = data['packcount'] if 'packcount' in data else None if 'tvshowtitle' in data: # Search special episodes by name. All special episodes are added to season 0 by Trakt and TVDb. Hence, do not search by filename (eg: S02E00), since the season is not known. if (season == 0 or episode == 0) and ('title' in data and not data['title'] == None and not data['title'] == ''): title = '%s %s' % ( data['tvshowtitle'], data['title'] ) # Change the title for metadata filtering. query = title ignoreContains = len(data['title']) / float( len(title) ) # Increase the required ignore ration, since otherwise individual episodes and season packs are found as well. else: if pack: query = '%s %d' % (title, season) else: query = '%s S%02dE%02d' % (title, season, episode) else: query = '%s %d' % (title, year) query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', ' ', query) if not self._query(query): return sources category = self.category_shows if 'tvshowtitle' in data else self.category_movies url = urlparse.urljoin(self.base_link, self.search_link) pageLimit = tools.Settings.getInteger('scraping.providers.pages') pageCounter = 0 page = 1 added = False timerEnd = tools.Settings.getInteger( 'scraping.providers.timeout') - 8 timer = tools.Time(start=True) while True: # Stop searching 8 seconds before the provider timeout, otherwise might continue searching, not complete in time, and therefore not returning any links. if timer.elapsed() > timerEnd: break pageCounter += 1 if pageLimit > 0 and pageCounter > pageLimit: break urlNew = url % (page, urllib.quote_plus(query), category) # For some reason Zooqle returns 404 even though the response has a body. # This is probably a bug on Zooqle's server and the error should just be ignored. html = BeautifulSoup(client.request(urlNew, ignoreErrors=404)) page += 1 added = False htmlTable = html.find_all('table', class_='table-torrents')[0] htmlRows = htmlTable.find_all('tr', recursive=False) for i in range(1, len(htmlRows)): # First row is header. htmlRow = htmlRows[i] htmlColumns = htmlRow.find_all('td') htmlInfo = htmlColumns[1] htmlMeta = htmlInfo.find_all('div', recursive=False)[0] # Name htmlName = htmlInfo.find_all( 'a', recursive=False)[0].getText().strip() # Size htmlSize = htmlColumns[3].getText() # Link htmlLink = '' htmlLinks = htmlColumns[2].find_all('a') for j in range(len(htmlLinks)): link = htmlLinks[j]['href'] if link.startswith('magnet:'): htmlLink = link break # Seeds htmlSeeds = htmlColumns[5].find_all( 'div', recursive=False)[0]['title'] indexStart = htmlSeeds.find(':') if indexStart > 0: indexStart += 1 indexEnd = htmlSeeds.find('|', indexStart) if indexEnd > 0: htmlSeeds = htmlSeeds[indexStart:indexEnd] else: htmlSeeds = htmlSeeds[indexStart:] htmlSeeds = int( htmlSeeds.replace(',', '').replace('.', '').strip()) else: htmlSeeds = None # Quality & 3D try: htmlQuality = htmlMeta.find_all( 'span', class_='hidden-xs')[0].getText().lower().strip() if 'ultra' in htmlQuality: htmlQuality = '4K' elif 'std' in htmlQuality: htmlQuality = 'SD' elif 'med' in htmlQuality or 'low' in htmlQuality: htmlQuality = 'CAM' htmlName += ' ' + htmlQuality except: pass # Audio try: htmlName += ' ' + htmlMeta.find_all( 'span', {'title': 'Audio format'})[0].getText() except: pass # Languages try: htmlLanguages = htmlMeta.find_all( 'span', {'title': 'Detected languages' })[0].getText().split(',') except: htmlLanguages = None # Metadata meta = metadata.Metadata(name=htmlName, title=title, titles=titles, year=year, season=season, episode=episode, pack=pack, packCount=packCount, link=htmlLink, size=htmlSize, seeds=htmlSeeds, languageAudio=htmlLanguages) # Ignore meta.ignoreAdjust(contains=ignoreContains) if meta.ignore(True): continue # Add sources.append({ 'url': htmlLink, 'debridonly': False, 'direct': False, 'source': 'torrent', 'language': self.language[0], 'quality': meta.videoQuality(), 'metadata': meta, 'file': htmlName }) added = True if not added: # Last page reached with a working torrent break return sources except: return sources
def sources(self, url, hostDict, hostprDict): self.tSources = [] try: if url == None: raise Exception() if not self.enabled or self.username == '' or self.password == '': raise Exception() data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) show = 'tvshowtitle' in data title = data['tvshowtitle'] if show else data['title'] titleYear = '%s S%02dE%02d' % (data['tvshowtitle'], int(data['season']), int(data['episode'])) if show else '%s (%s)' % (data['title'], data['year']) if 'exact' in data and data['exact']: query = title = data['tvshowtitle'] if 'tvshowtitle' in data else data['title'] year = None season = None episode = None pack = False packCount = None else: year = int(data['year']) if 'year' in data and not data['year'] == None else None season = int(data['season']) if 'season' in data and not data['season'] == None else None episode = int(data['episode']) if 'episode' in data and not data['episode'] == None else None pack = data['pack'] if 'pack' in data else False packCount = data['packcount'] if 'packcount' in data else None if show: subcategory = self.subcategories_show.values()[0] if len(self.subcategories_show) == 1 else self.subcategory_any else: subcategory = self.subcategories_movie.values()[0] if len(self.subcategories_movie) == 1 else self.subcategory_any if show: if pack: query = '%s S%02d' % (title, season) else: query = '%s S%02dE%02d' % (title, season, episode) else: query = '%s %d' % (title, year) query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', ' ', query) querySplit = query.split() url = urlparse.urljoin(self.base_link, self.search_link) query = urllib.quote_plus(query) pageLimit = tools.Settings.getInteger('scraping.providers.pages') pageCounter = 0 page = 0 added = False timerTimeout = tools.Settings.getInteger('scraping.providers.timeout') timerEnd = timerTimeout - 8 timer = tools.Time(start = True) threads = [] self.tLock = threading.Lock() while True: # Stop searching 8 seconds before the provider timeout, otherwise might continue searching, not complete in time, and therefore not returning any links. if timer.elapsed() > timerEnd: break pageCounter += 1 if pageLimit > 0 and pageCounter > pageLimit: break urlNew = url % (self.category_video, subcategory, query, page) html = BeautifulSoup(client.request(urlNew)) page += 25 added = False htmlTables = html.find_all('table', class_ = 'table') if htmlTables: htmlTable = htmlTables[0] htmlTbody = htmlTable.find_all('tbody')[0] htmlRows = htmlTbody.find_all('tr', recursive = False) for i in range(len(htmlRows)): # Stop searching 8 seconds before the provider timeout, otherwise might continue searching, not complete in time, and therefore not returning any links. if timer.elapsed() > timerEnd: break htmlRow = htmlRows[i] # Name htmlInfo = htmlRows[i].find_all('a', href = True)[1] htmlName = htmlInfo.getText() # Category if subcategory is self.subcategory_any: htmlCategory = htmlRow.find_all('div', class_ = 'hidden')[0].getText() if show and len(self.subcategories_show) > 1: if htmlCategory not in self.subcategories_show.keys(): continue elif len(self.subcategories_show) > 1: if htmlCategory not in self.subcategories_movie.keys(): continue # Size htmlSize = re.sub('([mMkKgGtT]?)[oO]', '\\1b', htmlRow.find_all('td')[5].getText()) # Link htmlLink = self.base_link + self.download_link + str(htmlInfo.get('href').encode('utf-8')).split('/')[-1].split('-')[0] # Seeds htmlSeeds = int(htmlRow.find_all('td')[7].getText()) # Metadata meta = metadata.Metadata(name = htmlName, title = title, year = year, season = season, episode = episode, pack = pack, packCount = packCount, link = htmlLink, size = htmlSize, seeds = htmlSeeds) # Ignore if meta.ignore(True): continue # Add self.tLock.acquire() self.tSources.append({'url' : htmlLink, 'debridonly' : False, 'direct' : False, 'source' : 'torrent', 'language' : self.language[0], 'quality': meta.videoQuality(), 'metadata' : meta, 'file' : htmlName}) self.tLock.release() added = True # Hash if self.inspection: htmlHash = urllib.quote(str(htmlInfo.get('href').encode('utf-8')), ':/+') thread = threading.Thread(target = self._hash, args = (htmlHash, len(self.tSources) - 1)) threads.append(thread) thread.start() if not added: # Last page reached with a working torrent break # First filter out all non-related links before doing the hash lookup. if self.inspection: timerTimeout -= 2 while True: if timer.elapsed() > timerTimeout: break if not any([thread.is_alive() for thread in threads]): break tools.Time.sleep(0.3) try: self.tLock.release() except: pass return self.tSources except: tools.Logger.error() try: self.tLock.release() except: pass return self.tSources