def _retrieveInv(self, query, required=None):
        timeout = self.timeoutTotal / 2
        timeoutThread = timeout - 2
        threads = []

        if required:
            indexes = self._indexes(required, self.inv_split)
            self.inv_count = 0
            self.inv_total = len(indexes)
            for i in indexes:
                threads.append(
                    threading.Thread(target=self._fetchInv,
                                     args=(i, required, timeoutThread, True)))

        indexes = self._indexes(query, self.inv_split)
        for i in indexes:
            threads.append(
                threading.Thread(target=self._fetchInv,
                                 args=(i, query, timeoutThread, False)))
        [i.start() for i in threads]

        tools.Time.sleep(0.5)
        timer = tools.Time(start=True)
        while timer.elapsed() < timeout and any(i.is_alive() for i in threads):
            tools.Time.sleep(0.5)
        self.timeoutCurrent = timer.elapsed()

        # For short titles, like "V for Vendetta".
        # Do not search for the required keywords by default, since these often contain a bunch of links, causing the query to be extremely slow.
        if len(query) < self.thresholdWords:
            self.inv_items.extend(self.inv_required)
Beispiel #2
0
    def _items(self, category, title, titles, year, season, episode, pack):
        try:
            timerEnd = tools.Settings.getInteger(
                'scraping.providers.timeout') - 3
            timer = tools.Time(start=True)

            items = offcloud.Core().items(category=category)
            try:
                self.mutex.acquire()
            except:
                pass

            threads = []
            for item in items:
                if item['status'] == offcloud.Core.StatusFinished:  # Only finished downloads.
                    id = item['id']
                    if not id in self.ids:
                        meta = metadata.Metadata(name=item['name'],
                                                 title=title,
                                                 titles=titles,
                                                 year=year,
                                                 season=season,
                                                 episode=episode,
                                                 pack=pack)
                        if not meta.ignore(size=False):
                            self.ids.append(id)
                            if category == offcloud.Core.CategoryInstant:
                                self.items.append(item)
                            else:
                                threads.append(
                                    threading.Thread(target=self._item,
                                                     args=(category, id,
                                                           season, episode)))

            try:
                self.mutex.release()
            except:
                pass

            if len(threads) > 0:
                [thread.start() for thread in threads]

                while True:
                    if timer.elapsed() > timerEnd:
                        break
                    if all([not thread.is_alive() for thread in threads]):
                        break
                    time.sleep(0.5)
        except:
            tools.Logger.error()
    def _retrieveInx(self, query):
        timeout = self.timeoutTotal - self.timeoutCurrent
        timeoutThread = timeout - 2
        threads = []

        indexes = self._indexes(self.inv_items, self.inx_split)
        for i in indexes:
            threads.append(
                threading.Thread(target=self._fetchInx,
                                 args=(i, query, timeoutThread)))
        [i.start() for i in threads]

        tools.Time.sleep(0.5)
        timer = tools.Time(start=True)
        while timer.elapsed() < timeout and any(i.is_alive() for i in threads):
            tools.Time.sleep(0.5)
Beispiel #4
0
	def sources(self, url, hostDict, hostprDict):
		sources = []
		try:
			if url == None: raise Exception()

			ignoreContains = None
			data = self._decode(url)

			if 'exact' in data and data['exact']:
				query = title = data['tvshowtitle'] if 'tvshowtitle' in data else data['title']
				titles = None
				year = None
				season = None
				episode = None
				pack = False
				packCount = None
			else:
				title = data['tvshowtitle'] if 'tvshowtitle' in data else data['title']
				titles = data['alternatives'] if 'alternatives' in data else None
				year = int(data['year']) if 'year' in data and not data['year'] == None else None
				season = int(data['season']) if 'season' in data and not data['season'] == None else None
				episode = int(data['episode']) if 'episode' in data and not data['episode'] == None else None
				pack = data['pack'] if 'pack' in data else False
				packCount = data['packcount'] if 'packcount' in data else None

				if 'tvshowtitle' in data:
					# Search special episodes by name. All special episodes are added to season 0 by Trakt and TVDb. Hence, do not search by filename (eg: S02E00), since the season is not known.
					if (season == 0 or episode == 0) and ('title' in data and not data['title'] == None and not data['title'] == ''):
						title = '%s %s' % (data['tvshowtitle'], data['title']) # Change the title for metadata filtering.
						query = title
						ignoreContains = len(data['title']) / float(len(title)) # Increase the required ignore ration, since otherwise individual episodes and season packs are found as well.
					else:
						if pack: query = '%s %d' % (title, season)
						else: query = '%s S%02dE%02d' % (title, season, episode)
				else:
					query = '%s %d' % (title, year)
				query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', ' ', query)

			query = urllib.quote_plus(query)
			if not self._query(query): return sources

			pageLimit = tools.Settings.getInteger('scraping.providers.pages')
			pageCounter = 0

			page = 1 # Pages start at 1
			added = False

			timerEnd = tools.Settings.getInteger('scraping.providers.timeout') - 8
			timer = tools.Time(start = True)

			while True:
				# Stop searching 8 seconds before the provider timeout, otherwise might continue searching, not complete in time, and therefore not returning any links.
				if timer.elapsed() > timerEnd:
					break

				pageCounter += 1
				if pageLimit > 0 and pageCounter > pageLimit:
					break

				urlNew = (self.base_link + self.search_link) % (query, page)
				html = BeautifulSoup(client.request(urlNew))
				htmlTable = html.find_all('table', class_ = 'search-table')[0]
				htmlRows = htmlTable.find_all('tr', recursive = False)

				page += 1
				added = False

				for i in range(len(htmlRows)):
					htmlRow = htmlRows[i]
					htmlColumns = htmlRow.find_all('td', recursive = False)

					# Name
					htmlName = htmlColumns[0].getText().strip()

					# Size
					htmlSize = htmlColumns[2].getText().strip()

					# Link
					htmlLink = htmlColumns[0].find_all('a')[0]['href'].strip()
					htmlLink = network.Container(htmlLink).torrentMagnet(title = title)

					# Seeds
					htmlSeeds = int(htmlColumns[3].getText().strip())

					# Metadata
					meta = metadata.Metadata(name = htmlName, title = title, titles = titles, year = year, season = season, episode = episode, pack = pack, packCount = packCount, link = htmlLink, size = htmlSize, seeds = htmlSeeds)

					# Ignore
					meta.ignoreAdjust(contains = ignoreContains)
					if meta.ignore(True): continue

					# Add
					sources.append({'url' : htmlLink, 'debridonly' : False, 'direct' : False, 'source' : 'torrent', 'language' : self.language[0], 'quality':  meta.videoQuality(), 'metadata' : meta, 'file' : htmlName})
					added = True

				if not added: # Last page reached with a working torrent
					break

			return sources
		except:
			return sources
Beispiel #5
0
    def sources(self, url, hostDict, hostprDict):
        sources = []
        try:
            if url == None: raise Exception()

            ignoreContains = None
            data = self._decode(url)

            if 'exact' in data and data['exact']:
                query = title = data[
                    'tvshowtitle'] if 'tvshowtitle' in data else data['title']
                titles = None
                year = None
                season = None
                episode = None
                pack = False
                packCount = None
            else:
                title = data['tvshowtitle'] if 'tvshowtitle' in data else data[
                    'title']
                titles = data[
                    'alternatives'] if 'alternatives' in data else None
                year = int(
                    data['year']
                ) if 'year' in data and not data['year'] == None else None
                season = int(
                    data['season']
                ) if 'season' in data and not data['season'] == None else None
                episode = int(
                    data['episode']) if 'episode' in data and not data[
                        'episode'] == None else None
                pack = data['pack'] if 'pack' in data else False
                packCount = data['packcount'] if 'packcount' in data else None

                if 'tvshowtitle' in data:
                    # Search special episodes by name. All special episodes are added to season 0 by Trakt and TVDb. Hence, do not search by filename (eg: S02E00), since the season is not known.
                    if (season == 0
                            or episode == 0) and ('title' in data
                                                  and not data['title'] == None
                                                  and not data['title'] == ''):
                        title = '%s %s' % (
                            data['tvshowtitle'], data['title']
                        )  # Change the title for metadata filtering.
                        query = title
                        ignoreContains = len(data['title']) / float(
                            len(title)
                        )  # Increase the required ignore ration, since otherwise individual episodes and season packs are found as well.
                    else:
                        if pack: query = '%s %d' % (title, season)
                        else:
                            query = '%s S%02dE%02d' % (title, season, episode)
                else:
                    query = '%s %d' % (title, year)
                query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', ' ', query)

            if not self._query(query): return sources
            query = urllib.quote_plus(query)

            category = self.category_shows if 'tvshowtitle' in data else self.category_movies
            url = urlparse.urljoin(self.base_link, self.search_link)

            pageLimit = tools.Settings.getInteger('scraping.providers.pages')
            pageCounter = 0

            page = 1
            added = False

            timerEnd = tools.Settings.getInteger(
                'scraping.providers.timeout') - 8
            timer = tools.Time(start=True)
            '''
			while True:
				# Stop searching 8 seconds before the provider timeout, otherwise might continue searching, not complete in time, and therefore not returning any links.
				if timer.elapsed() > timerEnd:
					break

				pageCounter += 1
				if pageLimit > 0 and pageCounter > pageLimit:
					break

				urlNew = url % (category, query, page)
				html = client.request(urlNew)

				# Demonoid does not have a closing tag for the rows.
				# This causes BeautifulSoup to only detect the first row.
				# Manually add a closing </tr> tag, except fore the first row.
				html = html.replace('<tr align="left" bgcolor="#CCCCCC">', '<tr align="left" bgcolor="">', 1)
				html = html.replace('<tr align="left" bgcolor="#CCCCCC">', '</tr><tr align="left" bgcolor="#CCCCCC">')

				html = BeautifulSoup(html)

				page += 1
				added = False

				htmlTable = html.find_all('td', class_ = 'ctable_content_no_pad')[0].find_all('table', recursive = False)[1]
				htmlRows = html.find_all('tr')

				i = 0
				while i < len(htmlRows):
					try:
						htmlRow = htmlRows[i]
						i += 1 # Normal loop increment.

						if len(htmlRow.find_all('td', {'rowspan' : '2'})) == 0:
							continue

						# Name
						htmlName = htmlRow.find_all('td', {'colspan' : '9'})[0].find_all('a')[0].getText().strip()

						htmlRow = htmlRows[i]
						i += 1 # Go to next row, because items are split over to lines.

						# Size
						htmlSize = htmlColumns[3].getText().strip()

						# Link
						htmlLink = htmlColumns[2].find_all('a')[0]['href']

						# Seeds
						htmlSeeds = int(htmlColumns[6].getText().strip())

						items = htmlColumns[0].find_all('a')

						# Release
						try:
							htmlRelease = items[1].getText()
							if not 'other' in htmlRelease.lower(): htmlName += ' ' + htmlRelease
						except:
							pass

						# Language
						try:
							htmlLanguage = items[2].getText()
						except:
							htmlLanguage = None

						# Metadata
						meta = metadata.Metadata(name = htmlName, title = title, titles = titles, year = year, season = season, episode = episode, pack = pack, packCount = packCount, link = htmlLink, size = htmlSize, seeds = htmlSeeds, languageAudio = htmlLanguage)

						# Ignore
						meta.ignoreAdjust(contains = ignoreContains)
						if meta.ignore(True): continue

						# Add
						sources.append({'url' : htmlLink, 'debridonly' : False, 'direct' : False, 'source' : 'torrent', 'language' : self.language[0], 'quality': meta.videoQuality(), 'metadata' : meta, 'file' : htmlName})
						added = True
					except:
						pass
			'''

            while True:
                # Stop searching 8 seconds before the provider timeout, otherwise might continue searching, not complete in time, and therefore not returning any links.
                if timer.elapsed() > timerEnd:
                    break

                pageCounter += 1
                if pageLimit > 0 and pageCounter > pageLimit:
                    break

                urlNew = url % (category, query, page)
                html = client.request(urlNew)

                page += 1
                added = False

                htmlRows = re.findall(
                    '<!--\s*tstart\s*-->(.*?)<tr\s*align="left"\s*bgcolor="#CCCCCC">',
                    html, re.M | re.S)
                htmlRows = ['<tr><td>' + i for i in htmlRows]
                for htmlRow in htmlRows:
                    try:
                        htmlRow = BeautifulSoup(htmlRow)
                        htmlColumns = htmlRow.find_all('td')

                        # Name
                        htmlName = htmlRow.find_all('a')[1].getText().strip()

                        # Size
                        htmlSize = htmlColumns[4].getText().strip()

                        # Link
                        htmlLink = htmlRow.find_all('a')[1]['href']
                        htmlLink = urlparse.urljoin(self.base_link, htmlLink)
                        htmlLink = re.search('genidy=(.*)', htmlLink,
                                             re.IGNORECASE)
                        if not htmlLink: continue
                        htmlLink = self.download_link % htmlLink.group(1)

                        # Seeds
                        try:
                            htmlSeeds = int(htmlColumns[7].getText().strip())
                        except:
                            htmlSeeds = 0

                        items = htmlColumns[0].find_all('a')

                        # Metadata
                        meta = metadata.Metadata(name=htmlName,
                                                 title=title,
                                                 titles=titles,
                                                 year=year,
                                                 season=season,
                                                 episode=episode,
                                                 pack=pack,
                                                 packCount=packCount,
                                                 link=htmlLink,
                                                 size=htmlSize,
                                                 seeds=htmlSeeds)

                        # Ignore
                        meta.ignoreAdjust(contains=ignoreContains)
                        if meta.ignore(True): continue

                        # Add
                        sources.append({
                            'url': htmlLink,
                            'debridonly': False,
                            'direct': False,
                            'source': 'torrent',
                            'language': self.language[0],
                            'quality': meta.videoQuality(),
                            'metadata': meta,
                            'file': htmlName
                        })
                        added = True
                    except:
                        pass

                if not added:  # Last page reached with a working torrent
                    break

            return sources
        except:
            return sources
Beispiel #6
0
    def sources(self, url, hostDict, hostprDict):
        sources = []
        try:
            if url == None:
                raise Exception()

            data = urlparse.parse_qs(url)
            data = dict([(i, data[i][0]) if data[i] else (i, '')
                         for i in data])

            if 'exact' in data and data['exact']:
                query = title = data[
                    'tvshowtitle'] if 'tvshowtitle' in data else data['title']
                year = None
                season = None
                episode = None
                pack = False
                packCount = None
            else:
                title = data['tvshowtitle'] if 'tvshowtitle' in data else data[
                    'title']
                year = int(
                    data['year']
                ) if 'year' in data and not data['year'] == None else None
                season = int(
                    data['season']
                ) if 'season' in data and not data['season'] == None else None
                episode = int(
                    data['episode']) if 'episode' in data and not data[
                        'episode'] == None else None
                pack = data['pack'] if 'pack' in data else False
                packCount = data['packcount'] if 'packcount' in data else None

                if 'tvshowtitle' in data:
                    if pack: query = '%s %d' % (title, season)
                    else: query = '%s S%02dE%02d' % (title, season, episode)
                else:
                    query = '%s %d' % (title, year)
                query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', ' ', query)

            category = self.category_shows if 'tvshowtitle' in data else self.category_movies
            url = urlparse.urljoin(self.base_link, self.search_link)

            pageLimit = tools.Settings.getInteger('scraping.providers.pages')
            pageCounter = 0

            page = 1  # Pages start at 1
            added = False

            timerEnd = tools.Settings.getInteger(
                'scraping.providers.timeout') - 8
            timer = tools.Time(start=True)

            while True:
                # Stop searching 8 seconds before the provider timeout, otherwise might continue searching, not complete in time, and therefore not returning any links.
                if timer.elapsed() > timerEnd:
                    break

                pageCounter += 1
                if pageLimit > 0 and pageCounter > pageLimit:
                    break

                urlNew = url % (category, urllib.quote_plus(query), page)
                html = client.request(urlNew)

                # HTML is corrupt. Try to fix it manually.
                try:
                    indexStart = html.find('class="table2"')
                    indexStart = html.find('<tr bgcolor', indexStart)
                    indexEnd = html.find('search_stat', indexStart)
                    html = html[indexStart:indexEnd]
                    indexEnd = html.rfind('</td>') + 5
                    html = html[:indexEnd]
                    html = html.replace('</a></td>', '</td>')
                    html = '<table>' + html + '</tr></table>'
                except:
                    pass

                html = BeautifulSoup(html)

                page += 1
                added = False

                htmlRows = html.find_all(
                    'tr'
                )  # Do not search further down the tree (just the direct children), because that will also retrieve the header row.
                for i in range(len(htmlRows)):
                    htmlRow = htmlRows[i]
                    htmlColumns = htmlRow.find_all('td')
                    htmlInfo = htmlColumns[0].find_all('div')[0]

                    # Name
                    htmlName = htmlInfo.find_all(
                        'a', recursive=False)[1].getText().strip()

                    # Link
                    htmlHash = htmlInfo.find_all('a',
                                                 recursive=False)[0]['href']
                    indexStart = htmlHash.find('torrent/')
                    if indexStart < 0: continue
                    indexStart += 8
                    indexEnd = htmlHash.find('.torrent', indexStart)
                    if indexEnd < 0: continue
                    htmlHash = htmlHash[indexStart:indexEnd]
                    if not tools.Hash.valid(htmlHash): continue
                    htmlLink = network.Container(htmlHash).torrentMagnet(
                        title=query)

                    # Size
                    htmlSize = htmlColumns[2].getText().strip()

                    # Seeds
                    htmlSeeds = int(htmlColumns[3].getText().replace(
                        ',', '').replace(' ', ''))

                    # Metadata
                    meta = metadata.Metadata(name=htmlName,
                                             title=title,
                                             year=year,
                                             season=season,
                                             episode=episode,
                                             pack=pack,
                                             packCount=packCount,
                                             link=htmlLink,
                                             size=htmlSize,
                                             seeds=htmlSeeds)

                    # Ignore
                    if meta.ignore(True):
                        continue

                    # Add
                    sources.append({
                        'url': htmlLink,
                        'debridonly': False,
                        'direct': False,
                        'source': 'torrent',
                        'language': self.language[0],
                        'quality': meta.videoQuality(),
                        'metadata': meta,
                        'file': htmlName
                    })
                    added = True

                if not added:  # Last page reached with a working torrent
                    break

            return sources
        except:
            return sources
Beispiel #7
0
	def sources(self, url, hostDict, hostprDict):
		self.tSources = []
		try:
			if url == None: raise Exception()

			ignoreContains = None
			data = self._decode(url)

			if 'exact' in data and data['exact']:
				query = title = data['tvshowtitle'] if 'tvshowtitle' in data else data['title']
				titles = None
				year = None
				season = None
				episode = None
				pack = False
				packCount = None
			else:
				title = data['tvshowtitle'] if 'tvshowtitle' in data else data['title']
				titles = data['alternatives'] if 'alternatives' in data else None
				year = int(data['year']) if 'year' in data and not data['year'] == None else None
				season = int(data['season']) if 'season' in data and not data['season'] == None else None
				episode = int(data['episode']) if 'episode' in data and not data['episode'] == None else None
				pack = data['pack'] if 'pack' in data else False
				packCount = data['packcount'] if 'packcount' in data else None

				if 'tvshowtitle' in data:
					# Search special episodes by name. All special episodes are added to season 0 by Trakt and TVDb. Hence, do not search by filename (eg: S02E00), since the season is not known.
					if (season == 0 or episode == 0) and ('title' in data and not data['title'] == None and not data['title'] == ''):
						title = '%s %s' % (data['tvshowtitle'], data['title']) # Change the title for metadata filtering.
						query = title
						ignoreContains = len(data['title']) / float(len(title)) # Increase the required ignore ration, since otherwise individual episodes and season packs are found as well.
					else:
						if pack: query = '%s %d' % (title, season)
						else: query = '%s S%02dE%02d' % (title, season, episode)
				else:
					query = '%s %d' % (title, year)
				query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', ' ', query)

			query = urllib.quote_plus(query)
			if not self._query(query): return sources
			
			url = urlparse.urljoin(self.base_link, self.search_link)

			pageLimit = tools.Settings.getInteger('scraping.providers.pages')
			pageCounter = 0 # Page starts at 1, but incremented before first request.

			timerTimeout = tools.Settings.getInteger('scraping.providers.timeout')
			timerEnd = timerTimeout - 8
			timer = tools.Time(start = True)

			threads = []
			self.tLock = threading.Lock()

			while True:
				try:
					# Stop searching 8 seconds before the provider timeout, otherwise might continue searching, not complete in time, and therefore not returning any links.
					if timer.elapsed() > timerEnd: break

					added = False
					pageCounter += 1
					if pageLimit > 0 and pageCounter > pageLimit: break

					html = BeautifulSoup(client.request(url % (query, pageCounter)))
					htmlTable = html.find_all('table', class_ = 'results')
					htmlTable = htmlTable[len(htmlTable) - 1]
					htmlRows = htmlTable.find_all('tr')

					for i in range(1, len(htmlRows)):
						try:
							htmlRow = htmlRows[i]
							htmlColumns = htmlRow.find_all('td', recursive = False) # Use children and no further.

							# Name
							htmlName = htmlColumns[0].find_all('a')[0].getText()

							# Link
							htmlLink = urlparse.urljoin(self.base_link, htmlColumns[0].find_all('a')[0]['href'])

							# Size
							htmlSize = htmlColumns[1].getText()

							# Age
							htmlAge = htmlColumns[3].getText()
							htmlAge = int(convert.ConverterDuration(htmlAge).value(convert.ConverterDuration.UnitDay))

							# Metadata
							meta = metadata.Metadata(name = htmlName, title = title, titles = titles, year = year, season = season, episode = episode, pack = pack, packCount = packCount, link = htmlLink, size = htmlSize, age = htmlAge)

							# Ignore
							meta.ignoreAdjust(contains = ignoreContains, length = 0.3)
							if meta.ignore(False): continue

							# Add
							self.tLock.acquire()
							self.tSources.append({'url' : None, 'debridonly' : False, 'direct' : False, 'source' : 'usenet', 'language' : self.language[0], 'quality':  meta.videoQuality(), 'metadata' : meta, 'file' : htmlName})
							self.tLock.release()
							added = True

							# Link
							thread = threading.Thread(target = self._link, args = (htmlLink, len(self.tSources) - 1))
							threads.append(thread)
							thread.start()

						except:
							pass

					if not added: break
				except:
					break

			# First filter out all non-related links before doing the hash lookup.
			timerTimeout -= 2
			while True:
				if timer.elapsed() > timerTimeout: break
				if not any([thread.is_alive() for thread in threads]): break
				tools.Time.sleep(0.5)

			try: self.tLock.release()
			except: pass
		except:
			try: self.tLock.release()
			except: pass

		return [i for i in self.tSources if i['url']]
Beispiel #8
0
	def sources(self, url, hostDict, hostprDict):
		self.tSources = []
		try:
			if url == None:
				raise Exception()

			if not self.enabled or self.username == '' or self.password == '':
				raise Exception()

			data = urlparse.parse_qs(url)

			data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data])

			show = 'tvshowtitle' in data
			title = data['tvshowtitle'] if show else data['title']
			titleYear = '%s S%02dE%02d' % (data['tvshowtitle'], int(data['season']), int(data['episode'])) if show else '%s (%s)' % (data['title'], data['year'])

			if 'exact' in data and data['exact']:
				query = title = data['tvshowtitle'] if 'tvshowtitle' in data else data['title']
				year = None
				season = None
				episode = None
				pack = False
				packCount = None
			else:
				year = int(data['year']) if 'year' in data and not data['year'] == None else None
				season = int(data['season']) if 'season' in data and not data['season'] == None else None
				episode = int(data['episode']) if 'episode' in data and not data['episode'] == None else None
				pack = data['pack'] if 'pack' in data else False
				packCount = data['packcount'] if 'packcount' in data else None

				if show: subcategory = self.subcategories_show.values()[0] if len(self.subcategories_show) == 1 else self.subcategory_any
				else: subcategory = self.subcategories_movie.values()[0] if len(self.subcategories_movie) == 1 else self.subcategory_any

				if show:
					if pack: query = '%s S%02d' % (title, season)
					else: query = '%s S%02dE%02d' % (title, season, episode)
				else:
					query = '%s %d' % (title, year)
				query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', ' ', query)
				querySplit = query.split()

			url = urlparse.urljoin(self.base_link, self.search_link)
			query = urllib.quote_plus(query)

			pageLimit = tools.Settings.getInteger('scraping.providers.pages')
			pageCounter = 0

			page = 0
			added = False

			timerTimeout = tools.Settings.getInteger('scraping.providers.timeout')
			timerEnd = timerTimeout - 8
			timer = tools.Time(start = True)

			threads = []
			self.tLock = threading.Lock()
			while True:
				# Stop searching 8 seconds before the provider timeout, otherwise might continue searching, not complete in time, and therefore not returning any links.
				if timer.elapsed() > timerEnd:
					break

				pageCounter += 1
				if pageLimit > 0 and pageCounter > pageLimit:
					break

				urlNew = url % (self.category_video, subcategory, query, page)
				html = BeautifulSoup(client.request(urlNew))

				page += 25
				added = False

				htmlTables = html.find_all('table', class_ = 'table')
				if htmlTables:
					htmlTable = htmlTables[0]
					htmlTbody = htmlTable.find_all('tbody')[0]
					htmlRows = htmlTbody.find_all('tr', recursive = False)

					for i in range(len(htmlRows)):
						# Stop searching 8 seconds before the provider timeout, otherwise might continue searching, not complete in time, and therefore not returning any links.
						if timer.elapsed() > timerEnd:
							break

						htmlRow = htmlRows[i]

						# Name
						htmlInfo = htmlRows[i].find_all('a', href = True)[1]
						htmlName = htmlInfo.getText()

						# Category
						if subcategory is self.subcategory_any:
							htmlCategory = htmlRow.find_all('div', class_ = 'hidden')[0].getText()
							if show and len(self.subcategories_show) > 1:
								if htmlCategory not in self.subcategories_show.keys():
									continue
							elif len(self.subcategories_show) > 1:
								if htmlCategory not in self.subcategories_movie.keys():
									continue

						# Size
						htmlSize = re.sub('([mMkKgGtT]?)[oO]', '\\1b', htmlRow.find_all('td')[5].getText())

						# Link
						htmlLink = self.base_link + self.download_link + str(htmlInfo.get('href').encode('utf-8')).split('/')[-1].split('-')[0]

						# Seeds
						htmlSeeds = int(htmlRow.find_all('td')[7].getText())

						# Metadata
						meta = metadata.Metadata(name = htmlName, title = title, year = year, season = season, episode = episode, pack = pack, packCount = packCount, link = htmlLink, size = htmlSize, seeds = htmlSeeds)
						
						# Ignore
						if meta.ignore(True):
							continue

						# Add
						self.tLock.acquire()
						self.tSources.append({'url' : htmlLink, 'debridonly' : False, 'direct' : False, 'source' : 'torrent', 'language' : self.language[0], 'quality':  meta.videoQuality(), 'metadata' : meta, 'file' : htmlName})
						self.tLock.release()
						added = True

						# Hash
						if self.inspection:
							htmlHash = urllib.quote(str(htmlInfo.get('href').encode('utf-8')), ':/+')
							thread = threading.Thread(target = self._hash, args = (htmlHash, len(self.tSources) - 1))
							threads.append(thread)
							thread.start()

				if not added: # Last page reached with a working torrent
					break

			# First filter out all non-related links before doing the hash lookup.
			if self.inspection:
				timerTimeout -= 2
				while True:
					if timer.elapsed() > timerTimeout: break
					if not any([thread.is_alive() for thread in threads]): break
					tools.Time.sleep(0.3)

			try: self.tLock.release()
			except: pass

			return self.tSources
		except:
			tools.Logger.error()
			try: self.tLock.release()
			except: pass
			return self.tSources
Beispiel #9
0
	def sources(self, url, hostDict, hostprDict):
		sources = []
		try:
			if url == None:
				raise Exception()

			data = urlparse.parse_qs(url)
			data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data])

			if 'exact' in data and data['exact']:
				query = title = data['tvshowtitle'] if 'tvshowtitle' in data else data['title']
				year = None
				season = None
				episode = None
				pack = False
				packCount = None
			else:
				title = data['tvshowtitle'] if 'tvshowtitle' in data else data['title']
				year = int(data['year']) if 'year' in data and not data['year'] == None else None
				season = int(data['season']) if 'season' in data and not data['season'] == None else None
				episode = int(data['episode']) if 'episode' in data and not data['episode'] == None else None
				pack = data['pack'] if 'pack' in data else False
				packCount = data['packcount'] if 'packcount' in data else None

				if 'tvshowtitle' in data:
					if pack: query = '%s %d' % (title, season)
					else: query = '%s S%02dE%02d' % (title, season, episode)
				else:
					query = '%s %d' % (title, year)
				query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', ' ', query)

			url = urlparse.urljoin(self.base_link, self.search_link)
			category = self.category_shows if ('tvshowtitle' in data and not data['tvshowtitle'] == None and not data['tvshowtitle'] == '') else self.category_movies

			pageLimit = tools.Settings.getInteger('scraping.providers.pages')
			pageCounter = 0

			page = 1
			added = False

			timerEnd = tools.Settings.getInteger('scraping.providers.timeout') - 8
			timer = tools.Time(start = True)

			while True:
				# Stop searching 8 seconds before the provider timeout, otherwise might continue searching, not complete in time, and therefore not returning any links.
				if timer.elapsed() > timerEnd:
					break

				pageCounter += 1
				if pageLimit > 0 and pageCounter > pageLimit:
					break

				urlNew = url % (category, urllib.quote_plus(query), page)
				html = BeautifulSoup(client.request(urlNew))

				page += 1
				added = False

				# NB: Do not use "tbody class=results", since the table has inner div/style that breaks parsing.
				htmlRows = html.find_all('tr', class_ = 'result') # Do not search further down the tree (just the direct children), because that will also retrieve the header row.
				for i in range(len(htmlRows)):
					try:
						htmlRow = htmlRows[i]
						htmlColumns = htmlRow.find_all('td', recursive = False)

						# Name
						htmlName = htmlColumns[0].find_all('a')[0].getText().strip()

						# Size
						htmlSize = htmlColumns[1].getText().strip()

						# Link
						htmlLink = ''
						htmlLinks = htmlColumns[0].find_all('a')
						for j in range(len(htmlLinks)):
							link = htmlLinks[j]['href']
							if link.startswith('magnet:'):
								htmlLink = link
								break

						# Seeds
						htmlSeeds = int(re.sub('[^0-9]', '', htmlColumns[4].getText().strip()))

						# Metadata
						meta = metadata.Metadata(name = htmlName, title = title, year = year, season = season, episode = episode, pack = pack, packCount = packCount, link = htmlLink, size = htmlSize, seeds = htmlSeeds)

						# Ignore
						if meta.ignore(True):
							continue

						# Add
						sources.append({'url' : htmlLink, 'debridonly' : False, 'direct' : False, 'source' : 'torrent', 'language' : self.language[0], 'quality': meta.videoQuality(), 'metadata' : meta, 'file' : htmlName})
						added = True
					except:
						pass

				if not added: # Last page reached with a working torrent
					break

			return sources
		except:
			return sources
Beispiel #10
0
    def sources(self, url, hostDict, hostprDict):
        sources = []
        try:
            if url == None:
                raise Exception()

            data = urlparse.parse_qs(url)
            data = dict([(i, data[i][0]) if data[i] else (i, '')
                         for i in data])

            type = self.type_tvshows if (
                'tvshowtitle' in data and not data['tvshowtitle'] == None
                and not data['tvshowtitle'] == '') else self.type_movies
            title = data['tvshowtitle'] if 'tvshowtitle' in data else data[
                'title']
            titleYear = '%s S%02dE%02d' % (
                data['tvshowtitle'], int(data['season']), int(data['episode'])
            ) if 'tvshowtitle' in data else '%s (%s)' % (data['title'],
                                                         data['year'])

            title = data['tvshowtitle'] if 'tvshowtitle' in data else data[
                'title']
            year = int(
                data['year']
            ) if 'year' in data and not data['year'] == None else None
            season = int(
                data['season']
            ) if 'season' in data and not data['season'] == None else None
            episode = int(
                data['episode']
            ) if 'episode' in data and not data['episode'] == None else None
            pack = data['pack'] if 'pack' in data else False

            if 'tvshowtitle' in data:
                if pack: query = '%s %d' % (title, season)
                else: query = '%s S%02dE%02d' % (title, season, episode)
            else:
                query = '%s %d' % (title, year)
            query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', ' ', query)
            url = urlparse.urljoin(self.base_link, self.search_link)

            page = 1
            added = False

            timerEnd = tools.Settings.getInteger(
                'scraping.providers.timeout') - 8
            timer = tools.Time(start=True)

            while True:
                # Stop searching 8 seconds before the provider timeout, otherwise might continue searching, not complete in time, and therefore not returning any links.
                if timer.elapsed() > timerEnd:
                    break

                urlNew = url % (urllib.quote_plus(query), type, page)
                html = BeautifulSoup(client.request(urlNew))

                page += 1
                added = False

                htmlTable = html.find_all('div', id='div2child')[0]
                htmlRows = htmlTable.find_all(
                    'div', class_='resultdiv', recursive=False
                )  # Do not search further down the tree (just the direct children), because that will also retrieve the header row.

                for i in range(len(htmlRows)):
                    htmlRow = htmlRows[i]
                    htmlInfo = htmlRow.find_all('div',
                                                class_='resultdivbotton')[0]

                    # Name
                    htmlName = htmlRow.find_all(
                        'div', class_='resultdivtop')[0].find_all(
                            'div',
                            class_='resultdivtopname')[0].getText().strip()

                    # Size
                    htmlSize = htmlInfo.find_all(
                        'div', class_='resultlength')[0].find_all(
                            'div',
                            class_='resultdivbottonlength')[0].getText()

                    # Link
                    htmlHash = htmlInfo.find_all(
                        'div', class_='hideinfohash')[0].getText()
                    htmlLink = network.Container(htmlHash).torrentMagnet(
                        title=titleYear)

                    # Seeds
                    htmlSeeds = int(
                        htmlInfo.find_all(
                            'div', class_='resultseed')[0].find_all(
                                'div',
                                class_='resultdivbottonseed')[0].getText())

                    # Metadata
                    meta = metadata.Metadata(name=htmlName,
                                             title=title,
                                             year=year,
                                             season=season,
                                             episode=episode,
                                             pack=pack,
                                             link=htmlLink,
                                             size=htmlSize,
                                             seeds=htmlSeeds)

                    # Ignore
                    if meta.ignore(True):
                        continue

                    # Add
                    sources.append({
                        'url': htmlLink,
                        'debridonly': False,
                        'direct': False,
                        'source': 'torrent',
                        'language': self.language[0],
                        'quality': meta.videoQuality(),
                        'info': meta.information(),
                        'file': htmlName
                    })
                    added = True

                if not added:  # Last page reached with a working torrent
                    break

            return sources
        except:
            return sources
Beispiel #11
0
	def sources(self, url, hostDict, hostprDict):
		sources = []
		try:
			if url == None:
				raise Exception()

			data = urlparse.parse_qs(url)
			data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data])

			if 'exact' in data and data['exact']:
				query = title = data['tvshowtitle'] if 'tvshowtitle' in data else data['title']
				year = None
				season = None
				episode = None
				pack = False
				packCount = None
			else:
				title = data['tvshowtitle'] if 'tvshowtitle' in data else data['title']
				year = int(data['year']) if 'year' in data and not data['year'] == None else None
				season = int(data['season']) if 'season' in data and not data['season'] == None else None
				episode = int(data['episode']) if 'episode' in data and not data['episode'] == None else None
				pack = data['pack'] if 'pack' in data else False
				packCount = data['packcount'] if 'packcount' in data else None

				if 'tvshowtitle' in data:
					if pack: query = '%s %d' % (title, season)
					else: query = '%s S%02dE%02d' % (title, season, episode)
				else:
					query = '%s %d' % (title, year)
				query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', ' ', query)

			url = urlparse.urljoin(self.base_link, self.search_link)
			category = self.category_tvshows if ('tvshowtitle' in data and not data['tvshowtitle'] == None and not data['tvshowtitle'] == '') else self.category_movies
			url += category

			pageLimit = tools.Settings.getInteger('scraping.providers.pages')
			pageCounter = 0

			page = 0 # Pages start at 0
			added = False

			timerEnd = tools.Settings.getInteger('scraping.providers.timeout') - 8
			timer = tools.Time(start = True)

			while True:
				# Stop searching 8 seconds before the provider timeout, otherwise might continue searching, not complete in time, and therefore not returning any links.
				if timer.elapsed() > timerEnd:
					break

				pageCounter += 1
				if pageLimit > 0 and pageCounter > pageLimit:
					break

				urlNew = url % (urllib.quote_plus(query), page)
				html = client.request(urlNew)

				htmlLower = html.lower()
				start = htmlLower.index('<img')
				end = htmlLower.index('>', start) + 1
				html = html[:start] + html[end:]
				html = html.replace('</b></a><td', '</b></a></td><td')

				html = html.replace('<shn>', '').replace('</shn>', '')
				html = html.replace('<shnn>', '').replace('</shnn>', '')
				html = html.replace('<shn2>', '').replace('</shn2>', '')

				html = BeautifulSoup(html)

				page += 1
				added = False

				htmlRows = html.find_all('tr', class_ = 't-row') # Missing closing tags. Look for rows directly instead.
				for i in range(len(htmlRows)):
					try:
						htmlRow = htmlRows[i]
						htmlColumns = htmlRow.find_all('th')
						tools.Logger.log("FFFGGGx: "+str(len(htmlColumns)))

						# Name
						htmlName = htmlRow.find_all('td', recursive = False)[0].getText().strip()

						# Size
						htmlSize = htmlColumns[2].getText().strip()

						# Link
						htmlLink = htmlRow.find_all('td', recursive = False)[0].find_all('a')[1]['href'].strip()
						htmlLink = urlparse.urljoin(self.base_link, htmlLink)

						# Seeds
						htmlSeeds = int(re.sub('[^0-9]', '', htmlColumns[4].getText().replace(',', '').replace('.', '').strip()))

						# Metadata
						meta = metadata.Metadata(name = htmlName, title = title, year = year, season = season, episode = episode, pack = pack, packCount = packCount, link = htmlLink, size = htmlSize, seeds = htmlSeeds)

						# Ignore
						if meta.ignore(True):
							continue

						# Add
						sources.append({'url' : htmlLink, 'debridonly' : False, 'direct' : False, 'source' : 'torrent', 'language' : self.language[0], 'quality': meta.videoQuality(), 'metadata' : meta, 'file' : htmlName})
						added = True
					except:
						pass

				if not added: # Last page reached with a working torrent
					break

			return sources
		except:
			return sources
Beispiel #12
0
    def sources(self, url, hostDict, hostprDict):
        self.items = [
        ]  # NB: The same object of the provider is used for both normal episodes and season packs. Make sure it is cleared from the previous run.
        sources = []
        try:
            if url == None: raise Exception()

            core = premiumize.Core()
            if not core.accountValid(): raise Exception()

            data = self._decode(url)

            if 'exact' in data and data['exact']:
                title = data['tvshowtitle'] if 'tvshowtitle' in data else data[
                    'title']
                titles = None
                year = None
                season = None
                episode = None
                pack = False
            else:
                title = data['tvshowtitle'] if 'tvshowtitle' in data else data[
                    'title']
                titles = data[
                    'alternatives'] if 'alternatives' in data else None
                year = int(
                    data['year']
                ) if 'year' in data and not data['year'] == None else None
                season = int(
                    data['season']
                ) if 'season' in data and not data['season'] == None else None
                episode = int(
                    data['episode']) if 'episode' in data and not data[
                        'episode'] == None else None
                pack = data['pack'] if 'pack' in data else False

            if not self._query(title, year, season, episode, pack):
                return sources

            timerEnd = tools.Settings.getInteger(
                'scraping.providers.timeout') - 3
            timer = tools.Time(start=True)

            threads = []
            ids = []
            items = core._items()
            for item in items:
                id = item['id']
                if not id in ids:
                    # The RSS feed directory returns the same episodes individually and as a pack. Only add it once.
                    meta = metadata.Metadata(name=item['name'],
                                             title=title,
                                             titles=titles,
                                             year=year,
                                             season=season,
                                             episode=episode)
                    if ((not pack and item['name'] == source.FeedsName)
                            or not pack) and not meta.ignore(size=False):
                        if item['type'] == 'file':
                            item['video'] = item
                            self.items.append(item)
                        else:
                            threads.append(
                                threading.Thread(target=self._item,
                                                 args=(item['id'], None,
                                                       season, episode)))

            [thread.start() for thread in threads]

            while True:
                if timer.elapsed() > timerEnd:
                    break
                if all([not thread.is_alive() for thread in threads]):
                    break
                time.sleep(0.5)

            try:
                self.mutex.acquire()
            except:
                pass
            items = self.items
            try:
                self.mutex.release()
            except:
                pass

            for item in items:
                try:
                    jsonName = item['video']['name']
                    try:
                        if not item['name'] == jsonName and not item[
                                'name'] == 'root':
                            jsonName = item[
                                'name'] + ' - ' + jsonName  # Sometimes metadata, like quality, is only in the folder name, not the file name.
                    except:
                        pass

                    jsonLink = item['video']['link']
                    jsonSize = item['video']['size']['bytes']

                    # RAR Files
                    if jsonLink.lower().endswith('.rar'):
                        continue

                    # Metadata
                    meta = metadata.Metadata(name=jsonName,
                                             title=title,
                                             titles=titles,
                                             year=year,
                                             season=season,
                                             episode=episode,
                                             size=jsonSize,
                                             pack=pack)

                    # Add
                    sources.append({
                        'url': jsonLink,
                        'premium': True,
                        'debridonly': True,
                        'direct': True,
                        'memberonly': True,
                        'source': 'Premiumize',
                        'language': self.language[0],
                        'quality': meta.videoQuality(),
                        'metadata': meta,
                        'file': jsonName
                    })
                except:
                    pass
            return sources
        except:
            return sources
Beispiel #13
0
    def sources(self, url, hostDict, hostprDict):
        sources = []
        try:
            if url == None:
                raise Exception()

            data = urlparse.parse_qs(url)
            data = dict([(i, data[i][0]) if data[i] else (i, '')
                         for i in data])

            title = data['tvshowtitle'] if 'tvshowtitle' in data else data[
                'title']
            titleYear = '%s S%02dE%02d' % (
                data['tvshowtitle'], int(data['season']), int(data['episode'])
            ) if 'tvshowtitle' in data else '%s (%s)' % (data['title'],
                                                         data['year'])

            if 'exact' in data and data['exact']:
                query = title = data[
                    'tvshowtitle'] if 'tvshowtitle' in data else data['title']
                year = None
                season = None
                episode = None
                pack = False
                packCount = None
            else:
                title = data['tvshowtitle'] if 'tvshowtitle' in data else data[
                    'title']
                year = int(
                    data['year']
                ) if 'year' in data and not data['year'] == None else None
                season = int(
                    data['season']
                ) if 'season' in data and not data['season'] == None else None
                episode = int(
                    data['episode']) if 'episode' in data and not data[
                        'episode'] == None else None
                pack = data['pack'] if 'pack' in data else False
                packCount = data['packcount'] if 'packcount' in data else None
                if 'tvshowtitle' in data:
                    if pack: query = '%s %d' % (title, season)
                    else: query = '%s S%02dE%02d' % (title, season, episode)
                else:
                    query = '%s %d' % (title, year)
                query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', ' ', query)

            url = urlparse.urljoin(self.base_link, self.search_link)

            pageLimit = tools.Settings.getInteger('scraping.providers.pages')
            pageCounter = 0

            page = 1
            added = False

            timerEnd = tools.Settings.getInteger(
                'scraping.providers.timeout') - 8
            timer = tools.Time(start=True)

            while True:
                # Stop searching 8 seconds before the provider timeout, otherwise might continue searching, not complete in time, and therefore not returning any links.
                if timer.elapsed() > timerEnd:
                    break

                pageCounter += 1
                if pageLimit > 0 and pageCounter > pageLimit:
                    break

                urlNew = url % (query, page)  # Do not plus/quote query.
                html = BeautifulSoup(client.request(urlNew))

                page += 1
                added = False

                htmlRows = html.find_all('div', class_='rs')
                for i in range(len(htmlRows)):
                    try:
                        htmlRow = htmlRows[i]
                        htmlInfo = htmlRow.find_all('div', class_='sbar')[0]

                        # Name
                        htmlName = htmlRow.find_all(
                            'div', class_='title')[0].getText().strip()

                        # Size
                        htmlSize = htmlInfo.find_all('span')[3].find_all(
                            'b')[0].getText().strip()

                        # Link
                        htmlLink = None
                        for j in htmlInfo.find_all('a'):
                            if network.Container(j['href']).torrentIsMagnet():
                                htmlLink = j['href']
                                break

                        # Seeds
                        # No seeds, estimate with popularity.
                        try:
                            htmlSeeds = int(
                                htmlInfo.find_all('span')[5].find_all('b')
                                [0].getText().strip())
                            htmlSeeds /= 30000
                            htmlSeeds = max(1, htmlSeeds)
                        except:
                            htmlSeeds = 1

                        # Metadata
                        meta = metadata.Metadata(name=htmlName,
                                                 title=title,
                                                 year=year,
                                                 season=season,
                                                 episode=episode,
                                                 pack=pack,
                                                 packCount=packCount,
                                                 link=htmlLink,
                                                 size=htmlSize,
                                                 seeds=htmlSeeds)

                        # Ignore
                        if meta.ignore(True):
                            continue

                        # Add
                        sources.append({
                            'url': htmlLink,
                            'debridonly': False,
                            'direct': False,
                            'source': 'torrent',
                            'language': self.language[0],
                            'quality': meta.videoQuality(),
                            'metadata': meta,
                            'file': htmlName
                        })
                        added = True
                    except:
                        pass

                if not added:  # Last page reached with a working torrent
                    break

            return sources
        except:
            return sources
Beispiel #14
0
    def sources(self, url, hostDict, hostprDict):
        sources = []
        try:
            if url == None:
                raise Exception()

            data = urlparse.parse_qs(url)
            data = dict([(i, data[i][0]) if data[i] else (i, '')
                         for i in data])

            if 'exact' in data and data['exact']:
                query = title = data[
                    'tvshowtitle'] if 'tvshowtitle' in data else data['title']
                type = None
                year = None
                season = None
                episode = None
                pack = False
                packCount = None
            else:
                type = 'tv' if 'tvshowtitle' in data else 'movie'
                title = data['tvshowtitle'] if 'tvshowtitle' in data else data[
                    'title']
                year = int(
                    data['year']
                ) if 'year' in data and not data['year'] == None else None
                season = int(
                    data['season']
                ) if 'season' in data and not data['season'] == None else None
                episode = int(
                    data['episode']) if 'episode' in data and not data[
                        'episode'] == None else None
                pack = data['pack'] if 'pack' in data else False
                packCount = data['packcount'] if 'packcount' in data else None

                if 'tvshowtitle' in data:
                    if pack: query = '%s %d' % (title, season)
                    else: query = '%s S%02dE%02d' % (title, season, episode)
                else:
                    query = '%s %d' % (title, year)
                query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', ' ', query)

            query = query.replace(' ', '-')  # Uses - not + to separate words.
            query = query.lower()  # Only lower case letters work.

            url = urlparse.urljoin(self.base_link, self.search_link)

            pageLimit = tools.Settings.getInteger('scraping.providers.pages')
            pageCounter = 0

            page = 1
            added = False

            timerEnd = tools.Settings.getInteger(
                'scraping.providers.timeout') - 8
            timer = tools.Time(start=True)

            while True:
                # Stop searching 8 seconds before the provider timeout, otherwise might continue searching, not complete in time, and therefore not returning any links.
                if timer.elapsed() > timerEnd:
                    break

                pageCounter += 1
                if pageLimit > 0 and pageCounter > pageLimit:
                    break

                # Entries are alphabetically categorized according to their first letter.
                urlNew = url % (query[0], urllib.quote_plus(query), page)
                html = BeautifulSoup(client.request(urlNew))

                page += 1
                added = False

                htmlTable = html.find_all('table', class_='download')[0]
                htmlRows = htmlTable.find_all(
                    'tbody', recursive=False)[0].find_all('tr',
                                                          recursive=False)
                for i in range(0, len(htmlRows)):
                    try:
                        htmlRow = htmlRows[i]
                        htmlColumns = htmlRow.find_all('td', recursive=False)

                        # Type
                        if type and not htmlColumns[3].getText().strip().lower(
                        ) == type:
                            continue

                        # Name
                        htmlName = htmlColumns[1].find_all(
                            'a', recursive=False)[0].getText().strip()

                        # Size
                        htmlSize = htmlColumns[5].getText().strip()

                        # Link
                        htmlLink = htmlColumns[0].find_all(
                            'a', recursive=False)[0]['href'].strip()

                        # Seeds
                        htmlSeeds = int(htmlColumns[6].getText().strip())

                        # Metadata
                        meta = metadata.Metadata(name=htmlName,
                                                 title=title,
                                                 year=year,
                                                 season=season,
                                                 episode=episode,
                                                 pack=pack,
                                                 packCount=packCount,
                                                 link=htmlLink,
                                                 size=htmlSize,
                                                 seeds=htmlSeeds)

                        # Ignore
                        if meta.ignore(True):
                            continue

                        # Add
                        sources.append({
                            'url': htmlLink,
                            'debridonly': False,
                            'direct': False,
                            'source': 'torrent',
                            'language': self.language[0],
                            'quality': meta.videoQuality(),
                            'metadata': meta,
                            'file': htmlName
                        })
                        added = True
                    except:
                        pass

                if not added:  # Last page reached with a working torrent
                    break

            return sources
        except:
            return sources
Beispiel #15
0
    def sources(self, url, hostDict, hostprDict):
        sources = []
        try:
            if url == None: raise Exception()

            ignoreContains = None
            data = self._decode(url)

            if 'exact' in data and data['exact']:
                query = title = data[
                    'tvshowtitle'] if 'tvshowtitle' in data else data['title']
                titles = None
                year = None
                season = None
                episode = None
                pack = False
                packCount = None
            else:
                title = data['tvshowtitle'] if 'tvshowtitle' in data else data[
                    'title']
                titles = data[
                    'alternatives'] if 'alternatives' in data else None
                year = int(
                    data['year']
                ) if 'year' in data and not data['year'] == None else None
                season = int(
                    data['season']
                ) if 'season' in data and not data['season'] == None else None
                episode = int(
                    data['episode']) if 'episode' in data and not data[
                        'episode'] == None else None
                pack = data['pack'] if 'pack' in data else False
                packCount = data['packcount'] if 'packcount' in data else None

                if 'tvshowtitle' in data:
                    # Search special episodes by name. All special episodes are added to season 0 by Trakt and TVDb. Hence, do not search by filename (eg: S02E00), since the season is not known.
                    if (season == 0
                            or episode == 0) and ('title' in data
                                                  and not data['title'] == None
                                                  and not data['title'] == ''):
                        title = '%s %s' % (
                            data['tvshowtitle'], data['title']
                        )  # Change the title for metadata filtering.
                        query = title
                        ignoreContains = len(data['title']) / float(
                            len(title)
                        )  # Increase the required ignore ration, since otherwise individual episodes and season packs are found as well.
                    else:
                        if pack: query = '%s %d' % (title, season)
                        else:
                            query = '%s S%02dE%02d' % (title, season, episode)
                else:
                    query = '%s %d' % (title, year)
                query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', ' ', query)

            if not self._query(query): return sources

            querySplit = query.split()
            url = urlparse.urljoin(self.base_link, self.search_link)

            pageLimit = tools.Settings.getInteger('scraping.providers.pages')
            pageCounter = 0

            page = 0  # Pages start at 0
            added = False

            timerEnd = tools.Settings.getInteger(
                'scraping.providers.timeout') - 8
            timer = tools.Time(start=True)

            while True:
                # Stop searching 8 seconds before the provider timeout, otherwise might continue searching, not complete in time, and therefore not returning any links.
                if timer.elapsed() > timerEnd:
                    break

                pageCounter += 1
                if pageLimit > 0 and pageCounter > pageLimit:
                    break

                urlNew = url % (page, urllib.quote_plus(query))
                dataPhp = client.request(urlNew)

                dataPhp = dataPhp.replace('<pre>', '').replace('</pre>', '')
                dataPhp = dataPhp[:-1] + '}'
                dataPhp = dataPhp.replace('array (', '{').replace('),', '},')
                dataPhp = dataPhp.replace('NULL,', 'null,').replace(
                    ' => ', ' : ').replace('\'', '"')

                # Remove trailing commas
                dataPhp = re.sub(',[ \t\r\n]+}', '}', dataPhp)
                dataPhp = re.sub(',[ \t\r\n]+\]', ']', dataPhp)

                page += 1
                added = False

                result = json.loads(dataPhp)
                for key, value in result.iteritems():
                    jsonName = value['title']
                    jsonSize = value['size']
                    jsonLink = value['magnet']
                    try:
                        jsonSeeds = int(value['seeders'])
                    except:
                        jsonSeeds = None

                    # Metadata
                    meta = metadata.Metadata(name=jsonName,
                                             title=title,
                                             titles=titles,
                                             year=year,
                                             season=season,
                                             episode=episode,
                                             pack=pack,
                                             packCount=packCount,
                                             link=jsonLink,
                                             size=jsonSize,
                                             seeds=jsonSeeds)

                    # Ignore
                    meta.ignoreAdjust(contains=ignoreContains)
                    if meta.ignore(True): continue

                    # Ignore Name
                    # TorrentProject has a lot of season packs, foreign titles, and other torrents that should be excluded. If the name does not contain the exact search string, ignore the result.
                    if not all(q in jsonName for q in querySplit):
                        continue

                    # Add
                    sources.append({
                        'url': jsonLink,
                        'debridonly': False,
                        'direct': False,
                        'source': 'torrent',
                        'language': self.language[0],
                        'quality': meta.videoQuality(),
                        'metadata': meta,
                        'file': jsonName
                    })
                    added = True

                if not added:  # Last page reached with a working torrent
                    break

            return sources
        except:
            return sources
Beispiel #16
0
    def sources(self, url, hostDict, hostprDict):
        self.tSources = []
        try:
            if url == None: raise Exception()

            ignoreContains = None
            data = self._decode(url)

            show = 'tvshowtitle' in data
            type = self.type_shows if 'tvshowtitle' in data else self.type_movies

            if 'exact' in data and data['exact']:
                title = data['tvshowtitle'] if 'tvshowtitle' in data else data[
                    'title']
                titles = None
                queries = [title]
                year = None
                season = None
                episode = None
                pack = False
                packCount = None
                packExceptions = None
            else:
                title = data['tvshowtitle'] if show else data['title']
                titles = data[
                    'alternatives'] if 'alternatives' in data else None
                year = int(
                    data['year']
                ) if 'year' in data and not data['year'] == None else None
                season = int(
                    data['season']
                ) if 'season' in data and not data['season'] == None else None
                episode = int(
                    data['episode']) if 'episode' in data and not data[
                        'episode'] == None else None
                pack = data['pack'] if 'pack' in data else False
                packCount = data['packcount'] if 'packcount' in data else None
                packExceptions = None
                if show:
                    # Search special episodes by name. All special episodes are added to season 0 by Trakt and TVDb. Hence, do not search by filename (eg: S02E00), since the season is not known.
                    if (season == 0
                            or episode == 0) and ('title' in data
                                                  and not data['title'] == None
                                                  and not data['title'] == ''):
                        title = '%s %s' % (
                            data['tvshowtitle'], data['title']
                        )  # Change the title for metadata filtering.
                        queries = [title]
                        ignoreContains = len(data['title']) / float(
                            len(title)
                        )  # Increase the required ignore ration, since otherwise individual episodes and season packs are found as well.
                    else:
                        if pack:
                            queries = [
                                '%s S%02d' % (title, season),
                                '%s saison %d' % (title, season),
                                '%s intégrale' % title
                            ]
                            packExceptions = [
                                2
                            ]  # Index of query where season pack file name detection should be ignored.
                        else:
                            queries = [
                                '%s S%02dE%02d' % (title, season, episode)
                            ]
                else:
                    queries = ['%s %d' % (title, year)]
                queries = [
                    re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', ' ', query)
                    for query in queries
                ]

            if not self._query(queries): return self.tSources

            url = urlparse.urljoin(self.base_link, self.search_link)
            queries = [urllib.quote(query)
                       for query in queries]  # quote_plus does not work.

            timerTimeout = tools.Settings.getInteger(
                'scraping.providers.timeout')
            timerEnd = timerTimeout - 8
            timer = tools.Time(start=True)

            self.tThreadsSearches = []
            self.tThreadsLinks = []
            self.tLock = threading.Lock()

            for q in range(len(queries)):
                query = queries[q]
                packException = True if packExceptions and q in packExceptions else False
                thread = threading.Thread(target=self._search,
                                          args=(url, query, show, type, title,
                                                titles, year, season, episode,
                                                pack, packCount, packException,
                                                ignoreContains))
                self.tThreadsSearches.append(thread)
                thread.start()

            while True:
                if timer.elapsed() > timerTimeout: break
                if not any([t.is_alive() for t in self.tThreadsSearches]):
                    break
                tools.Time.sleep(0.5)

            # First filter out all non-related links before doing the hash lookup.
            timerTimeout -= 2
            while True:
                if timer.elapsed() > timerTimeout: break
                if not any([t.is_alive() for t in self.tThreadsLinks]): break
                tools.Time.sleep(0.5)

            try:
                self.tLock.release()
            except:
                pass

            return self.tSources
        except:
            tools.Logger.error()
            try:
                self.tLock.release()
            except:
                pass
            return self.tSources
Beispiel #17
0
    def sources(self, url, hostDict, hostprDict):
        sources = []
        try:
            if url == None:
                raise Exception()

            data = urlparse.parse_qs(url)
            data = dict([(i, data[i][0]) if data[i] else (i, '')
                         for i in data])
            pack = None

            if 'exact' in data and data['exact']:
                query = title = data[
                    'tvshowtitle'] if 'tvshowtitle' in data else data['title']
                year = None
                season = None
                episode = None
                pack = False
                packCount = None
            else:
                title = data['tvshowtitle'] if 'tvshowtitle' in data else data[
                    'title']
                year = int(
                    data['year']
                ) if 'year' in data and not data['year'] == None else None
                season = int(
                    data['season']
                ) if 'season' in data and not data['season'] == None else None
                episode = int(
                    data['episode']) if 'episode' in data and not data[
                        'episode'] == None else None
                pack = data['pack'] if 'pack' in data else False
                packCount = data['packcount'] if 'packcount' in data else None

                if 'tvshowtitle' in data:
                    if pack: query = '%s saison %d' % (title, season)
                    else: query = '%s S%02dE%02d' % (title, season, episode)
                else:
                    query = title  # Do not include year, otherwise there are few results.
                query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', ' ', query)

            type = self.type_shows if 'tvshowtitle' in data else self.type_movies

            url = urlparse.urljoin(self.base_link, self.search_link) % (
                type, urllib.quote_plus(query))
            html = BeautifulSoup(client.request(url))

            htmlTable = html.find_all(
                'table', class_='cust-table')[0].find_all('tbody',
                                                          recursive=False)[0]
            htmlRows = htmlTable.find_all('tr', recursive=False)

            self.tLock = threading.Lock()
            self.tLinks = [None] * len(htmlRows)
            threads = []
            for i in range(len(htmlRows)):
                urlTorrent = self.base_link + htmlRows[i].find_all(
                    'td', recursive=False)[0].find_all('a')[0]['href']
                threads.append(
                    threading.Thread(target=self._link, args=(urlTorrent, i)))

            [thread.start() for thread in threads]
            timerEnd = tools.Settings.getInteger(
                'scraping.providers.timeout') - 8
            timer = tools.Time(start=True)
            while timer.elapsed() < timerEnd and any(
                [thread.is_alive() for thread in threads]):
                tools.Time.sleep(0.5)

            self.tLock.acquire(
            )  # Just lock in case the threads are still running.

            for i in range(len(htmlRows)):
                # Stop searching 8 seconds before the provider timeout, otherwise might continue searching, not complete in time, and therefore not returning any links.
                if timer.elapsed() > timerEnd:
                    break

                htmlRow = htmlRows[i]
                htmlColumns = htmlRow.find_all('td', recursive=False)

                # Name
                htmlName = htmlColumns[0].getText().strip()
                if not 'tvshowtitle' in data:
                    htmlName = re.sub(
                        r"^(.*?)(TRUE|TRUEFRENCH|FRENCH|VOSTFR|VO)(.*)([0-9]{4})$",
                        r"\1 \4 \2\3", htmlName)

                # Link
                htmlLink = self.tLinks[i]

                # Size
                htmlSize = htmlColumns[1].getText().strip().lower().replace(
                    ' mo', 'MB').replace(' go', 'GB').replace(' o', 'b')

                # Seeds
                try:
                    htmlSeeds = int(htmlColumns[2].getText().strip())
                except:
                    htmlSeeds = None

                # Metadata
                meta = metadata.Metadata(name=htmlName,
                                         title=title,
                                         year=year,
                                         season=season,
                                         episode=episode,
                                         pack=pack,
                                         packCount=packCount,
                                         link=htmlLink,
                                         size=htmlSize,
                                         seeds=htmlSeeds)

                # Ignore
                if meta.ignore(False):
                    continue

                # Add
                sources.append({
                    'url': htmlLink,
                    'debridonly': False,
                    'direct': False,
                    'source': 'torrent',
                    'language': self.language[0],
                    'quality': meta.videoQuality(),
                    'metadata': meta,
                    'file': htmlName
                })

            self.tLock.release()

            return sources
        except:
            tools.Logger.error()
            return sources
Beispiel #18
0
    def sources(self, url, hostDict, hostprDict):
        sources = []
        try:
            if url == None:
                raise Exception()

            data = urlparse.parse_qs(url)
            data = dict([(i, data[i][0]) if data[i] else (i, '')
                         for i in data])

            title = data['tvshowtitle'] if 'tvshowtitle' in data else data[
                'title']
            year = int(
                data['year']
            ) if 'year' in data and not data['year'] == None else None
            season = int(
                data['season']
            ) if 'season' in data and not data['season'] == None else None
            episode = int(
                data['episode']
            ) if 'episode' in data and not data['episode'] == None else None
            pack = data['pack'] if 'pack' in data else False

            if 'tvshowtitle' in data:
                if pack: query = '%s %d' % (title, season)
                else: query = '%s S%02dE%02d' % (title, season, episode)
            else:
                query = '%s %d' % (title, year)
            query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', ' ', query)

            url = urlparse.urljoin(self.base_link, self.search_link)

            page = 0  # Pages start at 0
            added = False

            timerEnd = tools.Settings.getInteger(
                'scraping.providers.timeout') - 8
            timer = tools.Time(start=True)

            while True:
                # Stop searching 8 seconds before the provider timeout, otherwise might continue searching, not complete in time, and therefore not returning any links.
                if timer.elapsed() > timerEnd:
                    break

                urlNew = url % (urllib.quote_plus(query), page)
                html = BeautifulSoup(client.request(urlNew))

                page += 1
                added = False

                htmlTable = html.find_all('table', id='searchResult')[0]
                htmlRows = htmlTable.find_all(
                    'tr', recursive=False
                )  # Do not search further down the tree (just the direct children), because that will also retrieve the header row.

                for i in range(len(htmlRows)):
                    htmlRow = htmlRows[i]
                    htmlColumns = htmlRow.find_all('td')
                    htmlInfo = htmlColumns[1]

                    # Name
                    htmlName = htmlInfo.find_all('div',
                                                 class_='detName')[0].find_all(
                                                     'a')[0].getText().strip()

                    # Size
                    htmlSize = htmlInfo.find_all(
                        'font',
                        class_='detDesc')[0].getText().replace('&nbsp;', ' ')
                    indexStart = htmlSize.find(', Size')
                    indexEnd = htmlSize.find(', ', indexStart + 1)
                    htmlSize = htmlSize[indexStart + 7:indexEnd]

                    # Link
                    htmlLink = ''
                    htmlLinks = htmlInfo.find_all('a')
                    for j in range(len(htmlLinks)):
                        link = htmlLinks[j]['href']
                        if link.startswith('magnet:'):
                            htmlLink = link
                            break

                    # Seeds
                    htmlSeeds = int(htmlColumns[2].getText())

                    # Metadata
                    meta = metadata.Metadata(name=htmlName,
                                             title=title,
                                             year=year,
                                             season=season,
                                             episode=episode,
                                             pack=pack,
                                             link=htmlLink,
                                             size=htmlSize,
                                             seeds=htmlSeeds)

                    # Ignore
                    if meta.ignore(True):
                        continue

                    # Add
                    sources.append({
                        'url': htmlLink,
                        'debridonly': False,
                        'direct': False,
                        'source': 'torrent',
                        'language': self.language[0],
                        'quality': meta.videoQuality(),
                        'info': meta.information(),
                        'file': htmlName
                    })
                    added = True

                if not added:  # Last page reached with a working torrent
                    break

            return sources
        except:
            return sources
Beispiel #19
0
	def sources(self, url, hostDict, hostprDict):
		sources = []
		try:
			if url == None:
				raise Exception()

			data = urlparse.parse_qs(url)
			data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data])

			if 'exact' in data and data['exact']:
				query = title = data['tvshowtitle'] if 'tvshowtitle' in data else data['title']
				year = None
				season = None
				episode = None
				pack = False
				packCount = None
			else:
				title = data['tvshowtitle'] if 'tvshowtitle' in data else data['title']
				year = int(data['year']) if 'year' in data and not data['year'] == None else None
				season = int(data['season']) if 'season' in data and not data['season'] == None else None
				episode = int(data['episode']) if 'episode' in data and not data['episode'] == None else None
				pack = data['pack'] if 'pack' in data else False
				packCount = data['packcount'] if 'packcount' in data else None

				if 'tvshowtitle' in data:
					if pack: query = '%s %d' % (title, season)
					else: query = '%s S%02dE%02d' % (title, season, episode)
				else:
					query = '%s %d' % (title, year)
				query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', ' ', query)

			query = urllib.quote_plus(query)
			category = self.category_tvshows if ('tvshowtitle' in data and not data['tvshowtitle'] == None and not data['tvshowtitle'] == '') else self.category_movies

			pageLimit = tools.Settings.getInteger('scraping.providers.pages')
			pageCounter = 0

			page = 1 # Pages start at 1
			added = False

			timerEnd = tools.Settings.getInteger('scraping.providers.timeout') - 8
			timer = tools.Time(start = True)

			while True:
				# Stop searching 8 seconds before the provider timeout, otherwise might continue searching, not complete in time, and therefore not returning any links.
				if timer.elapsed() > timerEnd:
					break

				pageCounter += 1
				if pageLimit > 0 and pageCounter > pageLimit:
					break

				urlNew = (self.base_link + self.search_link) % (query, category, page)
				html = BeautifulSoup(client.request(urlNew))
				htmlTable = html.find_all('div', class_ = 'content')[0].find_all('table', class_ = 'table-sm', recursive = False)[1]
				htmlRows = htmlTable.find_all('tr', recursive = False)

				page += 1
				added = False

				for i in range(len(htmlRows)):
					htmlRow = htmlRows[i]
					htmlColumns = htmlRow.find_all('td', recursive = False)

					# Name
					htmlName = htmlColumns[0].getText().strip()

					# Size
					htmlSize = htmlColumns[1].getText().strip()

					# Link
					htmlLink = htmlRow.find_all('td', recursive = False)[0].find_all('a')[0]['href'].strip()
					htmlLink = re.search('\/torrent\/(.*)\/', htmlLink, re.IGNORECASE).group(1)
					htmlLink = (self.base_link + self.torrent_link) % htmlLink

					# Seeds
					htmlSeeds = int(htmlColumns[3].getText().strip())

					# Metadata
					meta = metadata.Metadata(name = htmlName, title = title, year = year, season = season, episode = episode, pack = pack, packCount = packCount, link = htmlLink, size = htmlSize, seeds = htmlSeeds)

					# Ignore
					if meta.ignore(True):
						continue

					# Add
					sources.append({'url' : htmlLink, 'debridonly' : False, 'direct' : False, 'source' : 'torrent', 'language' : self.language[0], 'quality':  meta.videoQuality(), 'metadata' : meta, 'file' : htmlName})
					added = True

				if not added: # Last page reached with a working torrent
					break

			return sources
		except:
			return sources
Beispiel #20
0
    def sources(self, url, hostDict, hostprDict):
        sources = []
        try:
            if url == None:
                raise Exception()

            data = urlparse.parse_qs(url)
            data = dict([(i, data[i][0]) if data[i] else (i, '')
                         for i in data])

            title = data['tvshowtitle'] if 'tvshowtitle' in data else data[
                'title']
            year = int(
                data['year']
            ) if 'year' in data and not data['year'] == None else None
            season = int(
                data['season']
            ) if 'season' in data and not data['season'] == None else None
            episode = int(
                data['episode']
            ) if 'episode' in data and not data['episode'] == None else None
            pack = data['pack'] if 'pack' in data else False
            packCount = data['packcount'] if 'packcount' in data else None

            category = self.category_show if 'tvshowtitle' in data else self.category_movie

            if 'tvshowtitle' in data:
                if pack: query = '%s %d' % (title, season)
                else: query = '%s S%02dE%02d' % (title, season, episode)
            else:
                query = '%s %d' % (title, year)
            query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', ' ', query)
            querySplit = query.split()

            # Login
            if self.enabled and self.username and not self.username == '' and self.password and not self.password == '':
                login = self.base_link + self.login_link
                post = urllib.urlencode({
                    'username': self.username,
                    'password': self.password,
                    'submit': 'submit'
                })
                cookie = client.request(login,
                                        post=post,
                                        output='cookie',
                                        close=False)
                response = client.request(login,
                                          post=post,
                                          cookie=cookie,
                                          output='extended')
                headers = {
                    'User-Agent': response[3]['User-Agent'],
                    'Cookie': response[3]['Cookie']
                }
            else:
                cookie = None
                headers = None

            url = urlparse.urljoin(self.base_link, self.search_link)

            pageLimit = tools.Settings.getInteger('scraping.providers.pages')
            pageCounter = 0

            page = 1
            added = False
            firstLink = None

            timerEnd = tools.Settings.getInteger(
                'scraping.providers.timeout') - 8
            timer = tools.Time(start=True)

            while True:
                # Stop searching 8 seconds before the provider timeout, otherwise might continue searching, not complete in time, and therefore not returning any links.
                if timer.elapsed() > timerEnd:
                    break

                pageCounter += 1
                if pageLimit > 0 and pageCounter > pageLimit:
                    break

                urlNew = url % (urllib.quote_plus(query),
                                urllib.quote_plus(category), page)
                data = client.request(urlNew, cookie=cookie)
                data = tools.Converter.jsonFrom(data)['torrentList']

                page += 1
                added = False

                for i in data:
                    try:
                        # File
                        jsonId = i['fid']

                        # File
                        jsonFile = i['filename']

                        # Name
                        try:
                            jsonName = i['name']
                        except:
                            jsonName = jsonFile

                        # Link
                        jsonLink = self.base_link + self.download_link
                        jsonLink = jsonLink % (jsonId, jsonFile)
                        if not headers == None:
                            jsonLink += '|' + urllib.urlencode(headers)

                        # Size
                        try:
                            jsonSize = i['size']
                        except:
                            jsonSize = None

                        # Seeds
                        try:
                            jsonSeeds = i['seeders']
                        except:
                            jsonSeeds = None

                        # Metadata
                        meta = metadata.Metadata(name=jsonName,
                                                 title=title,
                                                 year=year,
                                                 season=season,
                                                 episode=episode,
                                                 pack=pack,
                                                 packCount=packCount,
                                                 link=jsonLink,
                                                 size=jsonSize,
                                                 seeds=jsonSeeds)

                        # Ignore
                        if meta.ignore(True):
                            continue

                        # Add
                        sources.append({
                            'url': jsonLink,
                            'debridonly': False,
                            'memberonly': True,
                            'direct': False,
                            'source': 'torrent',
                            'language': self.language[0],
                            'quality': meta.videoQuality(),
                            'metadata': meta,
                            'file': jsonName
                        })
                        added = True
                    except:
                        tools.Logger.error()

                if not added:  # Last page reached with a working torrent
                    break

            return sources
        except:
            tools.Logger.error()
            return sources
Beispiel #21
0
    def sources(self, url, hostDict, hostprDict):
        sources = []
        try:
            if url == None:
                raise Exception()

            ignoreContains = None
            data = self._decode(url)

            if 'exact' in data and data['exact']:
                query = title = data[
                    'tvshowtitle'] if 'tvshowtitle' in data else data['title']
                titles = None
                year = None
                season = None
                episode = None
                pack = False
                packCount = None
            else:
                title = data['tvshowtitle'] if 'tvshowtitle' in data else data[
                    'title']
                titles = data[
                    'alternatives'] if 'alternatives' in data else None
                year = int(
                    data['year']
                ) if 'year' in data and not data['year'] == None else None
                season = int(
                    data['season']
                ) if 'season' in data and not data['season'] == None else None
                episode = int(
                    data['episode']) if 'episode' in data and not data[
                        'episode'] == None else None
                pack = data['pack'] if 'pack' in data else False
                packCount = data['packcount'] if 'packcount' in data else None

                if 'tvshowtitle' in data:
                    # Search special episodes by name. All special episodes are added to season 0 by Trakt and TVDb. Hence, do not search by filename (eg: S02E00), since the season is not known.
                    if (season == 0
                            or episode == 0) and ('title' in data
                                                  and not data['title'] == None
                                                  and not data['title'] == ''):
                        title = '%s %s' % (
                            data['tvshowtitle'], data['title']
                        )  # Change the title for metadata filtering.
                        query = title
                        ignoreContains = len(data['title']) / float(
                            len(title)
                        )  # Increase the required ignore ration, since otherwise individual episodes and season packs are found as well.
                    else:
                        # Only this format works for season packs.
                        # Does not support individual episodes.
                        if pack:
                            query = '%s S%02d' % (title, season)
                        else:
                            pack = True
                            query = '%s сезон %d' % (title, season)
                else:
                    query = '%s %d' % (title, year)
                query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', ' ', query)

            if not self._query(query): return sources

            url = urlparse.urljoin(self.base_link, self.search_link)

            pageLimit = tools.Settings.getInteger('scraping.providers.pages')
            pageCounter = 0

            page = 0  # Pages start at 0
            added = False

            timerEnd = tools.Settings.getInteger(
                'scraping.providers.timeout') - 8
            timer = tools.Time(start=True)

            while True:
                # Stop searching 8 seconds before the provider timeout, otherwise might continue searching, not complete in time, and therefore not returning any links.
                if timer.elapsed() > timerEnd:
                    break

                pageCounter += 1
                if pageLimit > 0 and pageCounter > pageLimit:
                    break

                urlNew = url % (urllib.quote_plus(query), page)
                html = client.request(urlNew)

                # There is a quote missing.
                # Replace and add custom class for easy identification.
                html = html.replace(
                    'style="width:1095px; class=" lista">',
                    'style="width:1095px;" class="gaia lista">')

                htmlLower = html.lower()
                start = htmlLower.index('class="gaia')
                start = htmlLower.index('</tr>', start) + 5
                end = htmlLower.index('</table>', start) + 8
                html = html[start:end]
                html = html.replace('\n', '').replace('\r', '')
                html = html.replace('</TR>', '</tr>')
                htmlRows = html.split('</tr>')

                page += 1
                added = False

                for htmlRow in htmlRows:

                    # Link
                    try:
                        htmlLink = re.search('(magnet:.*?)>', htmlRow,
                                             re.IGNORECASE).group(1)
                    except:
                        continue

                    # Name
                    try:
                        htmlName = ' ' + re.search(
                            'details\.php.*?>(.*?)<', htmlRow,
                            re.IGNORECASE).group(1).strip()
                    except:
                        htmlName = ''

                    # Category
                    try:
                        htmlName += ' ' + re.search(
                            'border=0\s+alt="(.*?)"', htmlRow,
                            re.IGNORECASE).group(1).strip()
                    except:
                        pass

                    # Size
                    try:
                        htmlSize = re.search('>(\d+\.+\d+ [g|m]b)<', htmlRow,
                                             re.IGNORECASE).group(1).strip()
                    except:
                        htmlSize = None

                    # Seeds
                    try:
                        htmlSeeds = int(
                            re.search('>(\d+)<', htmlRow,
                                      re.IGNORECASE).group(1).strip())
                    except:
                        htmlSeeds = None

                    htmlName = re.sub('[^A-Za-z0-9\s]', ' ', htmlName)
                    htmlName = re.sub('\s\s+', ' ', htmlName).strip()

                    # Otherwise if 3D appears multiple time in name, it will be ignored
                    # Eg: 3D Avatar 3D 2009 1080p BluR 3D
                    try:
                        htmlIndex = htmlName.lower().index('3d')
                        htmlName = htmlName.replace('3D', '').replace('3D', '')
                        if htmlIndex >= 0: htmlName += '3D'
                    except:
                        pass

                    # Metadata
                    meta = metadata.Metadata(name=htmlName,
                                             title=title,
                                             titles=titles,
                                             year=year,
                                             season=season,
                                             episode=episode,
                                             pack=pack,
                                             packCount=packCount,
                                             link=htmlLink,
                                             size=htmlSize,
                                             seeds=htmlSeeds)

                    # Ignore
                    meta.ignoreAdjust(contains=ignoreContains)
                    if meta.ignore(True): continue

                    # Add
                    sources.append({
                        'url': htmlLink,
                        'debridonly': False,
                        'direct': False,
                        'source': 'torrent',
                        'language': self.language[0],
                        'quality': meta.videoQuality(),
                        'metadata': meta,
                        'file': htmlName,
                        'pack': pack
                    })
                    added = True

                if not added:  # Last page reached with a working torrent
                    break

            return sources
        except:
            return sources
Beispiel #22
0
    def sources(self, url, hostDict, hostprDict):
        sources = []
        try:
            if url == None:
                raise Exception()

            data = urlparse.parse_qs(url)
            data = dict([(i, data[i][0]) if data[i] else (i, '')
                         for i in data])

            if 'exact' in data and data['exact']:
                query = title = data[
                    'tvshowtitle'] if 'tvshowtitle' in data else data['title']
                year = None
                season = None
                episode = None
                pack = False
                packCount = None
            else:
                title = data['tvshowtitle'] if 'tvshowtitle' in data else data[
                    'title']
                year = int(
                    data['year']
                ) if 'year' in data and not data['year'] == None else None
                season = int(
                    data['season']
                ) if 'season' in data and not data['season'] == None else None
                episode = int(
                    data['episode']) if 'episode' in data and not data[
                        'episode'] == None else None
                pack = data['pack'] if 'pack' in data else False
                packCount = data['packcount'] if 'packcount' in data else None

                if 'tvshowtitle' in data:
                    if pack: query = '%s %d' % (title, season)
                    else: query = '%s S%02dE%02d' % (title, season, episode)
                else:
                    query = '%s %d' % (title, year)
                query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', ' ', query)

            url = urlparse.urljoin(self.base_link, self.search_link)

            pageLimit = tools.Settings.getInteger('scraping.providers.pages')
            pageCounter = 0

            page = 0  # Pages start at 0
            added = False

            timerEnd = tools.Settings.getInteger(
                'scraping.providers.timeout') - 8
            timer = tools.Time(start=True)

            #while True:
            while page == 0:  # KickassTorrents currently has a problem to view any other page than page 1 while sorted by seeders. Only view first page.
                # Stop searching 8 seconds before the provider timeout, otherwise might continue searching, not complete in time, and therefore not returning any links.
                if timer.elapsed() > timerEnd:
                    break

                pageCounter += 1
                if pageLimit > 0 and pageCounter > pageLimit:
                    break

                urlNew = url % (urllib.quote_plus(query))
                html = client.request(urlNew)

                # KickassTorrents has major mistakes in their HTML. manually remove parts to create new HTML.
                indexStart = html.find('<',
                                       html.find('<!-- Start of Loop -->') + 1)
                indexEnd = html.rfind('<!-- End of Loop -->')
                html = html[indexStart:indexEnd]

                html = html.replace('<div class="markeredBlock',
                                    '</div><div class="markeredBlock'
                                    )  # torrentname div tag not closed.
                html = html.replace('</span></td>',
                                    '</td>')  # Dangling </span> closing tag.

                html = BeautifulSoup(html)

                page += 1
                added = False

                htmlRows = html.find_all(
                    'tr', recursive=False
                )  # Do not search further down the tree (just the direct children).
                for i in range(len(htmlRows)):
                    htmlRow = htmlRows[i]
                    if 'firstr' in htmlRow['class']:  # Header.
                        continue
                    htmlColumns = htmlRow.find_all('td')
                    htmlInfo = htmlColumns[0]

                    # Name
                    htmlName = htmlInfo.find_all(
                        'a', class_='cellMainLink')[0].getText().strip()

                    # Size
                    htmlSize = htmlColumns[1].getText().replace('&nbsp;', ' ')

                    # Link
                    htmlLink = ''
                    htmlLinks = htmlInfo.find_all('a', class_='icon16')
                    for j in range(len(htmlLinks)):
                        link = htmlLinks[j]
                        if link.has_attr('href'):
                            link = link['href']
                            if 'magnet' in link:
                                htmlLink = urllib.unquote(
                                    re.findall('(magnet.*)', link)[0]
                                )  # Starts with redirection url, eg: https://mylink.bz/?url=magnet...
                                break

                    # Seeds
                    htmlSeeds = int(htmlColumns[3].getText())

                    # Metadata
                    meta = metadata.Metadata(name=htmlName,
                                             title=title,
                                             year=year,
                                             season=season,
                                             episode=episode,
                                             pack=pack,
                                             packCount=packCount,
                                             link=htmlLink,
                                             size=htmlSize,
                                             seeds=htmlSeeds)

                    # Ignore
                    if meta.ignore(True):
                        continue

                    # Add
                    sources.append({
                        'url': htmlLink,
                        'debridonly': False,
                        'direct': False,
                        'source': 'torrent',
                        'language': self.language[0],
                        'quality': meta.videoQuality(),
                        'metadata': meta,
                        'file': htmlName
                    })
                    added = True

                if not added:  # Last page reached with a working torrent
                    break

            return sources
        except:
            return sources
Beispiel #23
0
	def sources(self, url, hostDict, hostprDict):
		sources = []
		try:
			if url == None:
				raise Exception()

			if not self.enabled:
				raise Exception()

			data = urlparse.parse_qs(url)
			data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data])

			if 'exact' in data and data['exact']:
				query = title = data['tvshowtitle'] if 'tvshowtitle' in data else data['title']
				year = None
				season = None
				episode = None
			else:
				title = data['tvshowtitle'] if 'tvshowtitle' in data else data['title']
				year = int(data['year']) if 'year' in data and not data['year'] == None else None
				season = int(data['season']) if 'season' in data and not data['season'] == None else None
				episode = int(data['episode']) if 'episode' in data and not data['episode'] == None else None
				query = '%s S%02dE%02d' % (title, season, episode) if 'tvshowtitle' in data else '%s %d' % (title, year)

			query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', ' ', query)
			if not self.streamQuality == None and not self.streamQuality == '' and not self.streamQuality == 'sd':
				query += ' %s' % self.streamQuality
			if not self.streamLanguage == None and not self.streamLanguage == '' and not self.streamLanguage == 'un':
				query += ' lang:%s' % self.streamLanguage
			query = urllib.quote_plus(query)

			hostDict = hostprDict + hostDict

			iterations = self.streamLimit / float(self.streamIncrease)
			if iterations < 1:
				last = self.streamLimit
				iterations = 1
			else:
				difference = iterations - math.floor(iterations)
				last = self.streamIncrease if difference == 0 else int(difference * self.streamIncrease)
				iterations = int(math.ceil(iterations))

			timerEnd = tools.Settings.getInteger('scraping.providers.timeout') - 8
			timer = tools.Time(start = True)

			last = settings.Prontv.apiLast()
			api = settings.Prontv.apiNext()
			first = last

			for type in self.types:
				for offset in range(iterations):
					# Stop searching 8 seconds before the provider timeout, otherwise might continue searching, not complete in time, and therefore not returning any links.
					if timer.elapsed() > timerEnd:
						break

					if len(sources) >= self.streamLimit:
						break

					searchCount = last if offset == iterations - 1 else self.streamIncrease
					searchFrom = (offset * self.streamIncrease) + 1

					results = self.retrieve(type, api, query, searchCount, searchFrom)

					try:
						while self.limit(results):
							last = settings.Prontv.apiLast()
							if first == last: break
							api = settings.Prontv.apiNext()
							results = self.retrieve(type, api, query, searchCount, searchFrom)

						if self.limit(results):
							interface.Dialog.notification(title = 35261, message = interface.Translation.string(33952) + ' (' + str(results['fetchedtoday']) + ' ' + interface.Translation.string(35222) + ')', icon = interface.Dialog.IconWarning)
							tools.Time.sleep(2)
							return sources
					except: pass

					results = results['result']
					added = False
					for result in results:
						# Information
						jsonName = result['title']
						jsonSize = result['sizeinternal']
						jsonExtension = result['extension']
						jsonLanguage = result['lang']
						jsonHoster = result['hostername'].lower()
						jsonLink = result['hosterurls'][0]['url']

						# Ignore Hosters
						if not jsonHoster in hostDict:
							continue

						# Ignore Non-Videos
						# Alluc often has other files, such as SRT, also listed as streams.
						if not jsonExtension == None and not jsonExtension == '' and not tools.Video.extensionValid(jsonExtension):
							continue

						# Metadata
						meta = metadata.Metadata(name = jsonName, title = title, year = year, season = season, episode = episode, link = jsonLink, size = jsonSize)

						# Ignore
						if meta.ignore(False):
							continue

						# Add
						sources.append({'url' : jsonLink, 'debridonly' : False, 'direct' : False, 'memberonly' : True, 'source' : jsonHoster, 'language' : jsonLanguage, 'quality':  meta.videoQuality(), 'metadata' : meta, 'file' : jsonName})
						added = True

					if not added:
						break

			return sources
		except:
			return sources
Beispiel #24
0
    def sources(self, url, hostDict, hostprDict):
        sources = []
        try:
            if url == None:
                raise Exception()

            premiumize = debrid.Premiumize()

            if not premiumize.accountValid():
                raise Exception()

            data = urlparse.parse_qs(url)
            data = dict([(i, data[i][0]) if data[i] else (i, '')
                         for i in data])

            title = data['tvshowtitle'] if 'tvshowtitle' in data else data[
                'title']
            year = int(
                data['year']
            ) if 'year' in data and not data['year'] == None else None
            season = int(
                data['season']
            ) if 'season' in data and not data['season'] == None else None
            episode = int(
                data['episode']
            ) if 'episode' in data and not data['episode'] == None else None
            pack = data['pack'] if 'pack' in data else False

            timerEnd = tools.Settings.getInteger(
                'scraping.providers.timeout') - 3
            timer = tools.Time(start=True)

            threads = []
            ids = []
            items = premiumize._itemsTransfer()
            for item in items:
                if item['transfer']['progress']['completed'][
                        'value'] == 1:  # Only finished downloads.
                    id = item['id']
                    if not id in ids:
                        meta = metadata.Metadata(name=item['name'],
                                                 title=title,
                                                 year=year,
                                                 season=season,
                                                 episode=episode,
                                                 pack=pack)
                        if not meta.ignore(size=False):
                            ids.append(id)
                            threads.append(
                                threading.Thread(target=self._item,
                                                 args=(id, season, episode)))

            [thread.start() for thread in threads]

            while True:
                if timer.elapsed() > timerEnd:
                    break
                if all([not thread.is_alive() for thread in threads]):
                    break
                time.sleep(0.5)

            try:
                self.mutex.acquire()
            except:
                pass
            items = self.items
            try:
                self.mutex.release()
            except:
                pass

            for item in items:
                jsonName = item['video']['name']
                jsonLink = item['video']['link']
                jsonSize = item['video']['size']['bytes']

                # RAR Files
                if jsonLink.lower().endswith('.rar'):
                    continue

                # Metadata
                meta = metadata.Metadata(name=jsonName,
                                         title=title,
                                         year=year,
                                         season=season,
                                         episode=episode,
                                         size=jsonSize)

                # Add
                sources.append({
                    'url': jsonLink,
                    'premium': True,
                    'debridonly': True,
                    'direct': True,
                    'memberonly': True,
                    'source': 'Premiumize',
                    'language': self.language[0],
                    'quality': meta.videoQuality(),
                    'info': meta.information(),
                    'file': jsonName
                })

            return sources
        except:
            return sources
Beispiel #25
0
    def sources(self, url, hostDict, hostprDict):
        sources = []
        try:
            if url == None:
                raise Exception()

            data = urlparse.parse_qs(url)
            data = dict([(i, data[i][0]) if data[i] else (i, '')
                         for i in data])

            if 'exact' in data and data['exact']:
                query = title = data[
                    'tvshowtitle'] if 'tvshowtitle' in data else data['title']
                year = None
                season = None
                episode = None
                pack = False
                packCount = None
            else:
                title = data['tvshowtitle'] if 'tvshowtitle' in data else data[
                    'title']
                year = int(
                    data['year']
                ) if 'year' in data and not data['year'] == None else None
                season = int(
                    data['season']
                ) if 'season' in data and not data['season'] == None else None
                episode = int(
                    data['episode']) if 'episode' in data and not data[
                        'episode'] == None else None
                pack = data['pack'] if 'pack' in data else False
                packCount = data['packcount'] if 'packcount' in data else None

                if 'tvshowtitle' in data:
                    if pack: query = '%s %d' % (title, season)
                    else: query = '%s S%02dE%02d' % (title, season, episode)
                else:
                    query = '%s %d' % (title, year)
                query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', ' ', query)

            querySplit = query.split()
            url = urlparse.urljoin(self.base_link, self.search_link)

            pageLimit = tools.Settings.getInteger('scraping.providers.pages')
            pageCounter = 0

            page = 0  # Pages start at 0
            added = False

            timerEnd = tools.Settings.getInteger(
                'scraping.providers.timeout') - 8
            timer = tools.Time(start=True)

            while True:
                # Stop searching 8 seconds before the provider timeout, otherwise might continue searching, not complete in time, and therefore not returning any links.
                if timer.elapsed() > timerEnd:
                    break

                pageCounter += 1
                if pageLimit > 0 and pageCounter > pageLimit:
                    break

                urlNew = url % (urllib.quote_plus(query), (page * 20))
                data = client.request(urlNew)

                page += 1
                added = False

                result = json.loads(data)['results']
                for i in result:
                    jsonName = i['title']
                    jsonSize = i['size']
                    jsonLink = i['magnet']
                    try:
                        jsonSeeds = int(i['swarm']['seeders'])
                    except:
                        jsonSeeds = None

                    # Metadata
                    meta = metadata.Metadata(name=jsonName,
                                             title=title,
                                             year=year,
                                             season=season,
                                             episode=episode,
                                             pack=pack,
                                             packCount=packCount,
                                             link=jsonLink,
                                             size=jsonSize,
                                             seeds=jsonSeeds)

                    # Ignore
                    if meta.ignore(True):
                        continue

                    # Ignore Name
                    # TorrentProject has a lot of season packs, foreign titles, and other torrents that should be excluded. If the name does not contain the exact search string, ignore the result.
                    if not all(q in jsonName for q in querySplit):
                        continue

                    # Add
                    sources.append({
                        'url': jsonLink,
                        'debridonly': False,
                        'direct': False,
                        'source': 'torrent',
                        'language': self.language[0],
                        'quality': meta.videoQuality(),
                        'metadata': meta,
                        'file': jsonName
                    })
                    added = True

                if not added:  # Last page reached with a working torrent
                    break

            return sources
        except:
            return sources
Beispiel #26
0
    def sources(self, url, hostDict, hostprDict):
        sources = []
        try:
            if not tools.System.developers():
                raise Exception()

            if url == None:
                raise Exception()

            data = urlparse.parse_qs(url)
            data = dict([(i, data[i][0]) if data[i] else (i, '')
                         for i in data])

            if 'exact' in data and data['exact']:
                query = title = data[
                    'tvshowtitle'] if 'tvshowtitle' in data else data['title']
                year = None
                season = None
                episode = None
                pack = False
                packCount = None
            else:
                title = data['tvshowtitle'] if 'tvshowtitle' in data else data[
                    'title']
                year = int(
                    data['year']
                ) if 'year' in data and not data['year'] == None else None
                season = int(
                    data['season']
                ) if 'season' in data and not data['season'] == None else None
                episode = int(
                    data['episode']) if 'episode' in data and not data[
                        'episode'] == None else None
                pack = data['pack'] if 'pack' in data else False
                packCount = data['packcount'] if 'packcount' in data else None

                if 'tvshowtitle' in data:
                    if pack: query = '%s %d' % (title, season)
                    else: query = '%s S%02dE%02d' % (title, season, episode)
                else:
                    query = '%s %d' % (title, year)
                query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', ' ', query)

            category = self.category_tvshows if (
                'tvshowtitle' in data and not data['tvshowtitle'] == None
                and not data['tvshowtitle'] == '') else self.category_movies
            url = urlparse.urljoin(self.base_link, self.search_link)

            pageLimit = tools.Settings.getInteger('scraping.providers.pages')
            pageCounter = 0

            page = 1  # Pages start at 1
            added = False

            timerEnd = tools.Settings.getInteger(
                'scraping.providers.timeout') - 8
            timer = tools.Time(start=True)

            while True:
                # Stop searching 8 seconds before the provider timeout, otherwise might continue searching, not complete in time, and therefore not returning any links.
                if timer.elapsed() > timerEnd:
                    break

                pageCounter += 1
                if pageLimit > 0 and pageCounter > pageLimit:
                    break

                urlNew = url % (urllib.quote_plus(query), category, page)
                data = client.request(urlNew)

                # RarBg's HTML is not valid and a total mess, prababley to make it hard for scrapers.
                # First try to parse the HTML. If it fails, extract only the table from the markup and construct new HTML.
                # Sometimes both fail, seems like RarBg randomizes the corruption in its HTML.
                htmlRows = []
                try:
                    html = BeautifulSoup(data)
                    htmlTable = html.find_all('table', class_='lista2t')[0]
                    htmlRows = htmlTable.find_all('tr',
                                                  class_='lista2',
                                                  recursive=False)
                    if len(htmlRows) == 0: raise Exception()
                except:
                    start = data.find('lista2t')
                    if start < 0: raise Exception()
                    start += 7
                    start = data.find('lista2', start)
                    start = data.find('>', start) + 1
                    end = data.find('<tr><td align="center" colspan="2">',
                                    start)
                    data = '<html><body><table class="lista2t"><tr class="lista2">' + data[
                        start:end] + '</table></body></html>'
                    html = BeautifulSoup(data)
                    htmlTable = html.find_all('table', class_='lista2t')[0]
                    htmlRows = htmlTable.find_all('tr',
                                                  class_='lista2',
                                                  recursive=False)

                page += 1
                added = False

                for i in range(len(htmlRows)):
                    htmlRow = htmlRows[i]
                    htmlColumns = htmlRow.find_all('td')
                    htmlInfo = htmlColumns[1]

                    # Name
                    htmlName = htmlInfo.find_all('a')[0].getText().strip()

                    # 3D
                    htmlImages = htmlInfo.find_all('img')
                    for j in range(len(htmlImages)):
                        try:
                            if htmlImages[j]['src'].endswith('3d.png'):
                                htmlName += ' 3D'
                                break
                        except:
                            pass

                    # Size
                    htmlSize = htmlColumns[3].getText().strip()

                    # Link
                    # TODO: If the hash cannot be retrieved from the mouse-over image, fallback to the .torrent file.
                    try:
                        htmlLink = htmlInfo.find_all('a')[0]['onmouseover']
                        start = htmlLink.find('/over/')
                        if start < 0:
                            raise Exception()
                        start += 6
                        end = htmlLink.find('.', start)
                        htmlLink = htmlLink[start:end]
                        if not len(htmlLink) == 40:
                            raise Exception()
                        htmlLink = self.magnet_link % (
                            htmlLink, htmlName.replace(' ', ''))
                    except:
                        try:
                            htmlLink = htmlInfo.find_all('a')[0]['href']
                            start = htmlLink.find('torrent/')
                            if start < 0:
                                raise Exception()
                            start += 8
                            htmlLink = htmlLink[start:]
                            if len(htmlLink) == 0:
                                raise Exception()
                            htmlLink = self.torrent_link % (
                                htmlLink, htmlName.replace(' ', ''))
                        except:
                            continue

                    # Seeds
                    htmlSeeds = int(htmlColumns[4].getText().strip())

                    # Metadata
                    meta = metadata.Metadata(name=htmlName,
                                             title=title,
                                             year=year,
                                             season=season,
                                             episode=episode,
                                             pack=pack,
                                             packCount=packCount,
                                             link=htmlLink,
                                             size=htmlSize,
                                             seeds=htmlSeeds)

                    # Ignore
                    if meta.ignore(True):
                        continue

                    # Add
                    sources.append({
                        'url': htmlLink,
                        'debridonly': False,
                        'direct': False,
                        'source': 'torrent',
                        'language': self.language[0],
                        'quality': meta.videoQuality(),
                        'metadata': meta,
                        'file': htmlName
                    })
                    added = True

                if not added:  # Last page reached with a working torrent
                    break

            return sources
        except:
            return sources
Beispiel #27
0
    def sources(self, url, hostDict, hostprDict):
        sources = []
        try:
            if url == None:
                raise Exception()

            data = urlparse.parse_qs(url)
            data = dict([(i, data[i][0]) if data[i] else (i, '')
                         for i in data])

            if 'exact' in data and data['exact']:
                query = title = data[
                    'tvshowtitle'] if 'tvshowtitle' in data else data['title']
                type = None
                year = None
                season = None
                episode = None
                pack = False
                packCount = None
            else:
                type = 'tv' if 'tvshowtitle' in data else 'movie'
                title = data['tvshowtitle'] if 'tvshowtitle' in data else data[
                    'title']
                year = int(
                    data['year']
                ) if 'year' in data and not data['year'] == None else None
                season = int(
                    data['season']
                ) if 'season' in data and not data['season'] == None else None
                episode = int(
                    data['episode']) if 'episode' in data and not data[
                        'episode'] == None else None
                pack = data['pack'] if 'pack' in data else False
                packCount = data['packcount'] if 'packcount' in data else None

                if 'tvshowtitle' in data:
                    if pack: query = '%s %d' % (title, season)
                    else: query = '%s S%02dE%02d' % (title, season, episode)
                else:
                    query = '%s %d' % (title, year)
                query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', ' ', query)

            url = urlparse.urljoin(self.base_link, self.search_link)

            pageLimit = tools.Settings.getInteger('scraping.providers.pages')
            pageCounter = 0

            page = 1
            added = False

            timerEnd = tools.Settings.getInteger(
                'scraping.providers.timeout') - 8
            timer = tools.Time(start=True)

            while True:
                # Stop searching 8 seconds before the provider timeout, otherwise might continue searching, not complete in time, and therefore not returning any links.
                if timer.elapsed() > timerEnd:
                    break

                pageCounter += 1
                if pageLimit > 0 and pageCounter > pageLimit:
                    break

                urlNew = url % (urllib.quote_plus(query), page)
                html = BeautifulSoup(client.request(urlNew))

                page += 1
                added = False

                htmlTable = html.find_all('table', class_='table')[0]
                htmlRows = htmlTable.find_all('td', class_='x-item')
                for i in range(0, len(htmlRows)):
                    try:
                        htmlRow = htmlRows[i]

                        # Name
                        htmlName = htmlRow.find_all(
                            'a', class_='title')[0]['title'].strip()

                        # Size
                        htmlSize = htmlRow.find_all(
                            'div', class_='tail')[0].getText().replace(
                                '\n', '').replace('\r',
                                                  '').replace('&nbsp;',
                                                              ' ').strip()
                        htmlSize = re.search('.*[sS]ize:(.*)[dD]ownloads.*',
                                             htmlSize, re.IGNORECASE)
                        if htmlSize: htmlSize = htmlSize.group(1).strip()
                        else: htmlSize = None

                        # Link
                        htmlLink = htmlRow.find_all(
                            'div', class_='tail')[0].find_all(
                                'a', class_='title')[0]['href'].strip()

                        # Metadata
                        meta = metadata.Metadata(name=htmlName,
                                                 title=title,
                                                 year=year,
                                                 season=season,
                                                 episode=episode,
                                                 pack=pack,
                                                 packCount=packCount,
                                                 link=htmlLink,
                                                 size=htmlSize,
                                                 seeds=1)

                        # Ignore
                        if meta.ignore(True):
                            continue

                        # Add
                        sources.append({
                            'url': htmlLink,
                            'debridonly': False,
                            'direct': False,
                            'source': 'torrent',
                            'language': self.language[0],
                            'quality': meta.videoQuality(),
                            'metadata': meta,
                            'file': htmlName
                        })
                        added = True
                    except:
                        pass

                if not added:  # Last page reached with a working torrent
                    break

            return sources
        except:
            return sources
Beispiel #28
0
    def sources(self, url, hostDict, hostprDict):
        sources = []
        found = []
        try:
            if url == None:
                raise Exception()

            data = urlparse.parse_qs(url)
            data = dict([(i, data[i][0]) if data[i] else (i, '')
                         for i in data])

            if 'exact' in data and data['exact']:
                query = title = data[
                    'tvshowtitle'] if 'tvshowtitle' in data else data['title']
                year = None
                season = None
                episode = None
                pack = False
                packCount = None
            else:
                title = data['tvshowtitle'] if 'tvshowtitle' in data else data[
                    'title']
                year = int(
                    data['year']
                ) if 'year' in data and not data['year'] == None else None
                season = int(
                    data['season']
                ) if 'season' in data and not data['season'] == None else None
                episode = int(
                    data['episode']) if 'episode' in data and not data[
                        'episode'] == None else None
                pack = data['pack'] if 'pack' in data else False
                packCount = data['packcount'] if 'packcount' in data else None

                if 'tvshowtitle' in data:
                    if pack: query = '%s %d' % (title, season)
                    else: query = '%s S%02dE%02d' % (title, season, episode)
                else:
                    query = '%s %d' % (title, year)
                query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', ' ', query)

            url = urlparse.urljoin(self.base_link, self.search_link)

            pageLimit = tools.Settings.getInteger('scraping.providers.pages')
            pageCounter = 0

            page = 1
            added = False

            timerEnd = tools.Settings.getInteger(
                'scraping.providers.timeout') - 8
            timer = tools.Time(start=True)

            while True:
                # Stop searching 8 seconds before the provider timeout, otherwise might continue searching, not complete in time, and therefore not returning any links.
                if timer.elapsed() > timerEnd:
                    break

                pageCounter += 1
                if pageLimit > 0 and pageCounter > pageLimit:
                    break

                urlNew = url % (urllib.quote_plus(query), page)
                html = BeautifulSoup(client.request(urlNew))

                page += 1
                added = False

                htmlTable = html.find_all('table', class_='list')[0]
                htmlRows = htmlTable.find_all('tr', recursive=False)

                for i in range(1, len(htmlRows)):
                    htmlRow = htmlRows[i]
                    htmlColumns = htmlRow.find_all(
                        'td', recursive=False)  # Use children and no further.

                    # Name
                    htmlName = htmlColumns[0].find_all('a')[0].getText()

                    # Size
                    htmlSize = htmlColumns[3].getText()

                    # Link
                    htmlLink = htmlColumns[0].find_all('a')[0]['href']

                    # Age
                    htmlAge = htmlColumns[1].getText().lower()
                    if 'day' in htmlAge:
                        htmlAge = int(
                            htmlAge.replace('days', '').replace('day',
                                                                '').strip())
                    elif 'week' in htmlAge:
                        htmlAge = int(
                            htmlAge.replace('weeks', '').replace(
                                'week', '').strip()) * 7
                    elif 'month' in htmlAge:
                        htmlAge = int(
                            htmlAge.replace('months', '').replace(
                                'month', '').strip()) * 30
                    elif 'year' in htmlAge:
                        htmlAge = int(
                            htmlAge.replace('years', '').replace(
                                'year', '').strip()) * 365
                    else:
                        htmlAge = 0

                    # Metadata
                    meta = metadata.Metadata(name=htmlName,
                                             title=title,
                                             year=year,
                                             season=season,
                                             episode=episode,
                                             pack=pack,
                                             packCount=packCount,
                                             link=htmlLink,
                                             size=htmlSize,
                                             age=htmlAge)
                    meta.mIgnoreLength *= 10  # Otherwise too restrictive for very long usenet titles.

                    # Ignore
                    if meta.ignore(False):
                        continue

                    # Add
                    sources.append({
                        'url': htmlLink,
                        'debridonly': False,
                        'direct': False,
                        'source': 'usenet',
                        'language': self.language[0],
                        'quality': meta.videoQuality(),
                        'metadata': meta,
                        'file': htmlName
                    })
                    added = True

                if not added:  # Last page reached with a working torrent
                    break

            return sources
        except:
            return sources
Beispiel #29
0
    def sources(self, url, hostDict, hostprDict):
        sources = []
        try:
            if url == None: raise Exception()

            ignoreContains = None
            data = self._decode(url)

            if 'exact' in data and data['exact']:
                query = title = data[
                    'tvshowtitle'] if 'tvshowtitle' in data else data['title']
                titles = None
                year = None
                season = None
                episode = None
                pack = False
                packCount = None
            else:
                title = data['tvshowtitle'] if 'tvshowtitle' in data else data[
                    'title']
                titles = data[
                    'alternatives'] if 'alternatives' in data else None
                year = int(
                    data['year']
                ) if 'year' in data and not data['year'] == None else None
                season = int(
                    data['season']
                ) if 'season' in data and not data['season'] == None else None
                episode = int(
                    data['episode']) if 'episode' in data and not data[
                        'episode'] == None else None
                pack = data['pack'] if 'pack' in data else False
                packCount = data['packcount'] if 'packcount' in data else None

                if 'tvshowtitle' in data:
                    # Search special episodes by name. All special episodes are added to season 0 by Trakt and TVDb. Hence, do not search by filename (eg: S02E00), since the season is not known.
                    if (season == 0
                            or episode == 0) and ('title' in data
                                                  and not data['title'] == None
                                                  and not data['title'] == ''):
                        title = '%s %s' % (
                            data['tvshowtitle'], data['title']
                        )  # Change the title for metadata filtering.
                        query = title
                        ignoreContains = len(data['title']) / float(
                            len(title)
                        )  # Increase the required ignore ration, since otherwise individual episodes and season packs are found as well.
                    else:
                        if pack: query = '%s %d' % (title, season)
                        else:
                            query = '%s S%02dE%02d' % (title, season, episode)
                else:
                    query = '%s %d' % (title, year)
                query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', ' ', query)

            if not self._query(query): return sources

            category = self.category_shows if 'tvshowtitle' in data else self.category_movies
            url = urlparse.urljoin(self.base_link, self.search_link)

            pageLimit = tools.Settings.getInteger('scraping.providers.pages')
            pageCounter = 0

            page = 1
            added = False

            timerEnd = tools.Settings.getInteger(
                'scraping.providers.timeout') - 8
            timer = tools.Time(start=True)

            while True:
                # Stop searching 8 seconds before the provider timeout, otherwise might continue searching, not complete in time, and therefore not returning any links.
                if timer.elapsed() > timerEnd:
                    break

                pageCounter += 1
                if pageLimit > 0 and pageCounter > pageLimit:
                    break

                urlNew = url % (page, urllib.quote_plus(query), category)

                # For some reason Zooqle returns 404 even though the response has a body.
                # This is probably a bug on Zooqle's server and the error should just be ignored.
                html = BeautifulSoup(client.request(urlNew, ignoreErrors=404))

                page += 1
                added = False

                htmlTable = html.find_all('table', class_='table-torrents')[0]
                htmlRows = htmlTable.find_all('tr', recursive=False)
                for i in range(1, len(htmlRows)):  # First row is header.
                    htmlRow = htmlRows[i]
                    htmlColumns = htmlRow.find_all('td')
                    htmlInfo = htmlColumns[1]
                    htmlMeta = htmlInfo.find_all('div', recursive=False)[0]

                    # Name
                    htmlName = htmlInfo.find_all(
                        'a', recursive=False)[0].getText().strip()

                    # Size
                    htmlSize = htmlColumns[3].getText()

                    # Link
                    htmlLink = ''
                    htmlLinks = htmlColumns[2].find_all('a')
                    for j in range(len(htmlLinks)):
                        link = htmlLinks[j]['href']
                        if link.startswith('magnet:'):
                            htmlLink = link
                            break

                    # Seeds
                    htmlSeeds = htmlColumns[5].find_all(
                        'div', recursive=False)[0]['title']
                    indexStart = htmlSeeds.find(':')
                    if indexStart > 0:
                        indexStart += 1
                        indexEnd = htmlSeeds.find('|', indexStart)
                        if indexEnd > 0:
                            htmlSeeds = htmlSeeds[indexStart:indexEnd]
                        else:
                            htmlSeeds = htmlSeeds[indexStart:]
                        htmlSeeds = int(
                            htmlSeeds.replace(',', '').replace('.',
                                                               '').strip())
                    else:
                        htmlSeeds = None

                    # Quality & 3D
                    try:
                        htmlQuality = htmlMeta.find_all(
                            'span',
                            class_='hidden-xs')[0].getText().lower().strip()
                        if 'ultra' in htmlQuality: htmlQuality = '4K'
                        elif 'std' in htmlQuality: htmlQuality = 'SD'
                        elif 'med' in htmlQuality or 'low' in htmlQuality:
                            htmlQuality = 'CAM'
                        htmlName += ' ' + htmlQuality
                    except:
                        pass

                    # Audio
                    try:
                        htmlName += ' ' + htmlMeta.find_all(
                            'span', {'title': 'Audio format'})[0].getText()
                    except:
                        pass

                    # Languages
                    try:
                        htmlLanguages = htmlMeta.find_all(
                            'span', {'title': 'Detected languages'
                                     })[0].getText().split(',')
                    except:
                        htmlLanguages = None

                    # Metadata
                    meta = metadata.Metadata(name=htmlName,
                                             title=title,
                                             titles=titles,
                                             year=year,
                                             season=season,
                                             episode=episode,
                                             pack=pack,
                                             packCount=packCount,
                                             link=htmlLink,
                                             size=htmlSize,
                                             seeds=htmlSeeds,
                                             languageAudio=htmlLanguages)

                    # Ignore
                    meta.ignoreAdjust(contains=ignoreContains)
                    if meta.ignore(True): continue

                    # Add
                    sources.append({
                        'url': htmlLink,
                        'debridonly': False,
                        'direct': False,
                        'source': 'torrent',
                        'language': self.language[0],
                        'quality': meta.videoQuality(),
                        'metadata': meta,
                        'file': htmlName
                    })
                    added = True

                if not added:  # Last page reached with a working torrent
                    break

            return sources
        except:
            return sources
Beispiel #30
0
    def sources(self, url, hostDict, hostprDict):
        self.items = [
        ]  # NB: The same object of the provider is used for both normal episodes and season packs. Make sure it is cleared from the previous run.
        sources = []
        try:
            if url == None:
                raise Exception()

            if not debrid.OffCloud().accountValid():
                raise Exception()

            data = urlparse.parse_qs(url)
            data = dict([(i, data[i][0]) if data[i] else (i, '')
                         for i in data])

            if 'exact' in data and data['exact']:
                title = data['tvshowtitle'] if 'tvshowtitle' in data else data[
                    'title']
                year = None
                season = None
                episode = None
                pack = False
                packCount = 0
            else:
                title = data['tvshowtitle'] if 'tvshowtitle' in data else data[
                    'title']
                year = int(
                    data['year']
                ) if 'year' in data and not data['year'] == None else None
                season = int(
                    data['season']
                ) if 'season' in data and not data['season'] == None else None
                episode = int(
                    data['episode']) if 'episode' in data and not data[
                        'episode'] == None else None
                pack = data['pack'] if 'pack' in data else False
                packCount = data['packcount'] if 'packcount' in data else False

            timerEnd = tools.Settings.getInteger(
                'scraping.providers.timeout') - 3
            timer = tools.Time(start=True)

            threads = []
            self.ids = []
            threads.append(
                threading.Thread(target=self._items,
                                 args=(debrid.OffCloud.CategoryCloud, title,
                                       year, season, episode, pack)))
            threads.append(
                threading.Thread(target=self._items,
                                 args=(debrid.OffCloud.CategoryInstant, title,
                                       year, season, episode, pack)))
            [thread.start() for thread in threads]

            while True:
                if timer.elapsed() > timerEnd:
                    break
                if all([not thread.is_alive() for thread in threads]):
                    break
                time.sleep(0.5)

            try:
                self.mutex.acquire()
            except:
                pass
            items = self.items
            try:
                self.mutex.release()
            except:
                pass

            for item in items:
                try:
                    jsonName = item['video']['name']
                    try:
                        if not item['name'] == jsonName:
                            jsonName = item[
                                'name'] + ' - ' + jsonName  # Sometimes metadata, like quality, is only in the folder name, not the file name.
                    except:
                        pass

                    jsonLink = item['video']['link']
                    try:
                        jsonSize = item['size']['bytes']
                    except:
                        jsonSize = None

                    # Metadata
                    meta = metadata.Metadata(name=jsonName,
                                             title=title,
                                             year=year,
                                             season=season,
                                             episode=episode,
                                             size=jsonSize,
                                             pack=pack,
                                             packCount=packCount)

                    # Add
                    sources.append({
                        'url': jsonLink,
                        'premium': True,
                        'debridonly': True,
                        'direct': True,
                        'memberonly': True,
                        'source': 'OffCloud',
                        'language': self.language[0],
                        'quality': meta.videoQuality(),
                        'metadata': meta,
                        'file': jsonName
                    })
                except:
                    pass
            return sources
        except:
            return sources