Esempio n. 1
0
	def create(product_id, props, sale, brand):
		product = None
		product_key_name = Product.calc_key_name(product_id)
		if Product.get_by_key_name(product_key_name) is None:
			product = Product(key_name=product_key_name, id=product_id, name=props['name'],
								sale=sale, ship_min=parse(props.get('min_ship_date')), 
								ship_max=parse(props.get('max_ship_date')), brand=brand,
								description=helper.remove_html_tags(props.get('description')),
								status=props.get('status'),
								fit=props.get('fit'), categories=props['categories'],
								origin=props.get('origin'),
								return_policy_id=props.get('return_policy_id'))
			material = props.get('material')
			if material:
				try:
					product.materials = [str(m).strip().lower() for m in
						unidecode(material).split(',')]
				except:
					product.materials = material
			color = props.get('color')
			if color:
				product.colors = [str(c).strip().lower() for c in unidecode(color).split('/')]

			product.put()
			
		return product
Esempio n. 2
0
    def _search_provider(self, search_params, **kwargs):

        results = []
        if not self._authorised():
            return results

        items = {'Cache': [], 'Season': [], 'Episode': [], 'Propers': []}

        rc = dict((k, re.compile('(?i)' + v))
                  for (k, v) in {'info': 'view', 'get': 'download', 'title': 'view\s+torrent\s+'}.items())
        for mode in search_params.keys():
            for search_string in search_params[mode]:
                search_string = isinstance(search_string, unicode) and unidecode(search_string) or search_string
                # fetch 15 results by default, and up to 100 if allowed in user profile
                search_url = self.urls['search'] % (search_string, self._categories_string(mode, 'filter_cat[%s]=1'))

                html = self.get_url(search_url)

                cnt = len(items[mode])
                try:
                    if not html or self._has_no_results(html):
                        raise generic.HaltParseException

                    with BS4Parser(html, features=['html5lib', 'permissive']) as soup:
                        torrent_table = soup.find('table', attrs={'class': 'torrent_table'})
                        torrent_rows = [] if not torrent_table else torrent_table.find_all('tr')

                        if 2 > len(torrent_rows):
                            raise generic.HaltParseException

                        for tr in torrent_rows[1:]:
                            try:
                                seeders, leechers, size = [tryInt(n, n) for n in [
                                    tr.find_all('td')[x].get_text().strip() for x in (-2, -1, -4)]]
                                if self._peers_fail(mode, seeders, leechers):
                                    continue

                                info = tr.find('a', title=rc['info'])
                                title = 'title' in info.attrs and rc['title'].sub('', info.attrs['title']) \
                                        or info.get_text().strip()

                                link = str(tr.find('a', title=rc['get'])['href']).replace('&', '&').lstrip('/')
                                download_url = self.urls['get'] % link
                            except (AttributeError, TypeError, ValueError):
                                continue

                            if title and download_url:
                                items[mode].append((title, download_url, seeders, self._bytesizer(size)))

                except generic.HaltParseException:
                    pass
                except Exception:
                    logger.log(u'Failed to parse. Traceback: %s' % traceback.format_exc(), logger.ERROR)
                self._log_search(mode, len(items[mode]) - cnt, search_url)

            self._sort_seeders(mode, items)

            results = list(set(results + items[mode]))

        return results
Esempio n. 3
0
    def _doSearch(self, search_params, epcount=0, age=0):

        results = []
        items = {'Season': [], 'Episode': [], 'RSS': []}

        for mode in search_params.keys():
            for search_string in search_params[mode]:

                if mode != 'RSS':
                    searchURL = self.proxy._buildURL(self.searchurl % (urllib.quote(unidecode(search_string))))
                else:
                    searchURL = self.proxy._buildURL(self.url + 'tv/latest/')

                logger.log(u"Search string: " + searchURL, logger.DEBUG)

                data = self.getURL(searchURL)
                if not data:
                    continue

                re_title_url = self.proxy._buildRE(self.re_title_url)

                #Extracting torrent information from data returned by searchURL                   
                match = re.compile(re_title_url, re.DOTALL).finditer(urllib.unquote(data))
                for torrent in match:

                    title = torrent.group('title').replace('_',
                                                           '.')  #Do not know why but SickBeard skip release with '_' in name
                    url = torrent.group('url')
                    id = int(torrent.group('id'))
                    seeders = int(torrent.group('seeders'))
                    leechers = int(torrent.group('leechers'))

                    #Filter unseeded torrent
                    if mode != 'RSS' and seeders == 0:
                        continue

                        #Accept Torrent only from Good People for every Episode Search
                    if self.confirmed and re.search('(VIP|Trusted|Helper)', torrent.group(0)) is None:
                        logger.log(u"ThePirateBay Provider found result " + torrent.group(
                            'title') + " but that doesn't seem like a trusted result so I'm ignoring it", logger.DEBUG)
                        continue

                    #Check number video files = episode in season and find the real Quality for full season torrent analyzing files in torrent 
                    if mode == 'Season':
                        ep_number = int(epcount / len(set(allPossibleShowNames(self.show))))
                        title = self._find_season_quality(title, id, ep_number)

                    if not title or not url:
                        continue

                    item = title, url, id, seeders, leechers

                    items[mode].append(item)

            #For each search mode sort all the items by seeders
            items[mode].sort(key=lambda tup: tup[3], reverse=True)

            results += items[mode]

        return results
Esempio n. 4
0
    def _search_provider(self, search_params, **kwargs):

        self._authorised()
        results = []

        items = {'Cache': [], 'Season': [], 'Episode': [], 'Propers': []}

        url = self.urls['browse'] % self.passkey
        for mode in search_params.keys():
            for search_string in search_params[mode]:
                search_string = isinstance(search_string, unicode) and unidecode(search_string) or search_string
                search_url = url + (self.urls['search'] % search_string, '')['Cache' == mode]

                xml_data = self.cache.get_rss(search_url)

                cnt = len(items[mode])
                if xml_data and 'entries' in xml_data:
                    for entry in xml_data['entries']:
                        try:
                            if entry['title'] and 'download' in entry['link']:
                                items[mode].append((entry['title'], entry['link'], None, None))
                        except KeyError:
                            continue

                self._log_search(mode, len(items[mode]) - cnt, search_url)

            results = list(set(results + items[mode]))

        return results
Esempio n. 5
0
    def _search_provider(self, search_params, **kwargs):

        results = []
        if not self._authorised():
            return results

        items = {'Cache': [], 'Season': [], 'Episode': [], 'Propers': []}

        for mode in search_params.keys():
            for search_string in search_params[mode]:
                search_string = isinstance(search_string, unicode) and unidecode(search_string) or search_string
                search_url = self.urls['search'] % search_string

                data_json = self.get_url(search_url, headers=dict(Authorization='Bearer %s' % self._token), json=True)
                if self.should_skip():
                    return results

                cnt = len(items[mode])
                if data_json:
                    for tr in data_json.get('releases'):
                        seeders, leechers, size = (tryInt(n, n) for n in [
                            tr.get(x) for x in ('seeders', 'leechers', 'size')])
                        if not self._reject_item(seeders, leechers):
                            title, download_url = tr.get('releaseName'), self._link(tr.get('shortId'))
                            if title and download_url:
                                items[mode].append((title, download_url, seeders, self._bytesizer(size)))

                self._log_search(mode, len(items[mode]) - cnt, search_url)

            results = self._sort_seeding(mode, results + items[mode])

        return results
Esempio n. 6
0
    def _search_provider(self, search_params, **kwargs):

        results = []
        if self.show and not self.show.is_anime:
            return results

        items = {'Cache': [], 'Season': [], 'Episode': [], 'Propers': []}

        rc = dict((k, re.compile('(?i)' + v)) for (k, v) in {'nodots': r'[\.\s]+'}.items())

        for mode in search_params.keys():
            for search_string in search_params[mode]:
                search_string = isinstance(search_string, unicode) and unidecode(search_string) or search_string

                search_url = self.urls['browse'] if 'Cache' == mode else \
                    self.urls['search'] % (rc['nodots'].sub(' ', search_string), str(time.time()).replace('.', '3'))

                data, html = 2 * [None]
                if 'Cache' == mode:
                    data = self.cache.get_rss(search_url)
                else:
                    html = self.get_url(search_url)

                if self.should_skip():
                    return results

                cnt = len(items[mode])
                try:
                    if None is not data:
                        for cur_item in data.get('entries', []):
                            title, download_url = cur_item.get('title'), self._link(cur_item.get('link'))
                            if title and download_url:
                                items[mode].append((title, download_url, '', ''))
                    if not html or self._has_no_results(html):
                        raise generic.HaltParseException

                    with BS4Parser('<html><body>%s</body></html>' % html, features=['html5lib', 'permissive']) as soup:
                        for link in soup.find_all('a'):
                            try:
                                variants = map(lambda t: t.get_text().replace('SD', '480p'),
                                               link.find_all('span', class_='badge'))
                                map(lambda t: t.decompose(), link.find_all('span') + link.find_all('div'))
                                title = '[HorribleSubs] ' + re.sub(r'\s*\[HorribleSubs\]\s*', '', link.get_text())
                                download_url = self._link(link.get('href'))
                                if title and download_url:
                                    items[mode] += map(lambda v: (
                                        '%s [%s]' % (title, v), '%s-%s' % (download_url, v), '', ''), variants)
                            except (AttributeError, TypeError, ValueError):
                                continue

                except generic.HaltParseException:
                    pass
                except (StandardError, Exception):
                    logger.log(u'Failed to parse. Traceback: %s' % traceback.format_exc(), logger.ERROR)
                self._log_search(mode, len(items[mode]) - cnt, search_url)

            results = self._sort_seeding(mode, results + items[mode])

        return results
Esempio n. 7
0
    def _search_provider(self, search_params, **kwargs):

        results = []
        if not self._authorised():
            return results

        items = {'Cache': [], 'Season': [], 'Episode': [], 'Propers': []}

        rc = dict((k, re.compile('(?i)' + v)) for (k, v) in {'info': 'detail', 'get': 'download'}.items())
        for mode in search_params.keys():
            for search_string in search_params[mode]:
                search_string = isinstance(search_string, unicode) and unidecode(search_string) or search_string
                # URL with 50 tv-show results, or max 150 if adjusted in IPTorrents profile
                search_url = self.urls['search'] % (self._categories_string(mode, '%s', ';'), search_string,
                                                    ('', ';free')[self.freeleech], (';o=seeders', '')['Cache' == mode])

                html = self.get_url(search_url)

                cnt = len(items[mode])
                try:
                    if not html or self._has_no_results(html):
                        raise generic.HaltParseException

                    with BS4Parser(html, features=['html5lib', 'permissive']) as soup:
                        torrent_table = soup.find('table', attrs={'class': 'torrents'})
                        torrent_rows = [] if not torrent_table else torrent_table.find_all('tr')

                        if 2 > len(torrent_rows):
                            raise generic.HaltParseException

                        for tr in torrent_rows[1:]:
                            try:
                                seeders, leechers = [int(tr.find('td', attrs={'class': x}).get_text().strip())
                                                     for x in ('t_seeders', 't_leechers')]
                                if self._peers_fail(mode, seeders, leechers):
                                    continue

                                info = tr.find('a', href=rc['info'])
                                title = ('title' in info.attrs and info['title']) or info.get_text().strip()
                                size = tr.find_all('td')[-4].get_text().strip()

                                download_url = self.urls['get'] % str(tr.find('a', href=rc['get'])['href']).lstrip('/')
                            except (AttributeError, TypeError, ValueError):
                                continue

                            if title and download_url:
                                items[mode].append((title, download_url, seeders, self._bytesizer(size)))

                except generic.HaltParseException:
                    pass
                except Exception:
                    logger.log(u'Failed to parse. Traceback: %s' % traceback.format_exc(), logger.ERROR)
                self._log_search(mode, len(items[mode]) - cnt, search_url)

            self._sort_seeders(mode, items)

            results = list(set(results + items[mode]))

        return results
Esempio n. 8
0
    def _search_provider(self, search_params, **kwargs):

        results = []
        if not self._authorised():
            return results

        items = {'Cache': [], 'Season': [], 'Episode': [], 'Propers': []}

        rc = dict((k, re.compile('(?i)' + v)) for (k, v) in {'info': 'detail', 'get': 'download', 'fl': '\[\W*F\W?L\W*\]'
                                                             }.items())
        for mode in search_params.keys():
            for search_string in search_params[mode]:
                search_string = isinstance(search_string, unicode) and unidecode(search_string) or search_string
                search_url = self.urls['search'] % (search_string, self._categories_string())

                html = self.get_url(search_url, timeout=90)

                cnt = len(items[mode])
                try:
                    if not html or self._has_no_results(html):
                        raise generic.HaltParseException

                    with BS4Parser(html, features=['html5lib', 'permissive'], attr='border="1"') as soup:
                        torrent_table = soup.find('table', attrs={'border': '1'})
                        torrent_rows = [] if not torrent_table else torrent_table.find_all('tr')

                        if 2 > len(torrent_rows):
                            raise generic.HaltParseException

                        for tr in torrent_rows[1:]:
                            try:
                                info = tr.find('a', href=rc['info'])
                                seeders, leechers, size = [tryInt(n, n) for n in [
                                    tr.find_all('td')[x].get_text().strip() for x in (-2, -1, -4)]]
                                if self.freeleech and (len(info.contents) < 2 or not rc['fl'].search(info.contents[1].string.strip())) \
                                        or self._peers_fail(mode, seeders, leechers):
                                    continue

                                title = 'title' in info.attrs and info.attrs['title'] or info.contents[0]
                                title = (isinstance(title, list) and title[0] or title).strip()
                                download_url = self.urls['get'] % str(tr.find('a', href=rc['get'])['href']).lstrip('/')
                            except (AttributeError, TypeError, ValueError):
                                continue

                            if title and download_url:
                                items[mode].append((title, download_url, seeders, self._bytesizer(size)))

                except generic.HaltParseException:
                    pass
                except Exception:
                    logger.log(u'Failed to parse. Traceback: %s' % traceback.format_exc(), logger.ERROR)

                self._log_search(mode, len(items[mode]) - cnt, search_url)

            self._sort_seeders(mode, items)

            results = list(set(results + items[mode]))

        return results
Esempio n. 9
0
    def html(self, mode, search_string, results):

        if 'Content-Type' in self.session.headers:
            del (self.session.headers['Content-Type'])
        setattr(self.session, 'reserved', {'headers': {
            'Accept': 'text/html, application/xhtml+xml, */*', 'Accept-Language': 'en-GB',
            'Cache-Control': 'no-cache', 'Referer': 'https://broadcasthe.net/login.php', 'User-Agent': self.ua}})
        self.headers = None

        if self.auth_html or self._authorised_html():
            del (self.session.reserved['headers']['Referer'])
            if 'Referer' in self.session.headers:
                del (self.session.headers['Referer'])
            self.auth_html = True

            search_string = isinstance(search_string, unicode) and unidecode(search_string) or search_string
            search_url = self.urls['search'] % (search_string, self._categories_string(mode, 'filter_cat[%s]=1'))

            html = self.get_url(search_url, use_tmr_limit=False)
            if self.should_skip(log_warning=False, use_tmr_limit=False):
                return results

            cnt = len(results)
            try:
                if not html or self._has_no_results(html):
                    raise generic.HaltParseException

                with BS4Parser(html, features=['html5lib', 'permissive']) as soup:
                    torrent_table = soup.find(id='torrent_table')
                    torrent_rows = [] if not torrent_table else torrent_table.find_all('tr')

                    if 2 > len(torrent_rows):
                        raise generic.HaltParseException

                    rc = dict((k, re.compile('(?i)' + v)) for (k, v) in {
                        'cats': '(?i)cat\[(?:%s)\]' % self._categories_string(mode, template='', delimiter='|'),
                        'get': 'download'}.items())

                    head = None
                    for tr in torrent_rows[1:]:
                        cells = tr.find_all('td')
                        if 5 > len(cells):
                            continue
                        try:
                            head = head if None is not head else self._header_row(tr)
                            seeders, leechers, size = [tryInt(n, n) for n in [
                                cells[head[x]].get_text().strip() for x in 'seed', 'leech', 'size']]
                            if not tr.find('a', href=rc['cats']) or self._reject_item(
                                    seeders, leechers, container=self.reject_m2ts and (
                                            re.search(r'(?i)\[.*?m2?ts.*?\]', tr.get_text('', strip=True)))):
                                continue

                            title = tr.select('td span[title]')[0].attrs.get('title').strip()
                            download_url = self._link(tr.find('a', href=rc['get'])['href'])
                        except (AttributeError, TypeError, ValueError, KeyError, IndexError):
                            continue

                        if title and download_url:
                            results.append((title, download_url, seeders, self._bytesizer(size)))
Esempio n. 10
0
    def _search_provider(self, search_params, **kwargs):

        results = []
        if not self._authorised():
            return results

        items = {'Cache': [], 'Season': [], 'Episode': [], 'Propers': []}

        for mode in search_params.keys():
            rc = dict((k, re.compile('(?i)' + v)) for (k, v) in {
                'info': 'detail', 'get': '.*id=(\d+).*', 'fl': '\[freeleech\]',
                'cats': 'cat=(?:%s)' % self._categories_string(mode=mode, template='', delimiter='|')}.items())

            for search_string in search_params[mode]:
                search_string = isinstance(search_string, unicode) and unidecode(search_string) or search_string

                search_url = self.urls['search'] % (self._categories_string(),
                                                    '+'.join(search_string.replace('.', ' ').split()),
                                                    ('', '&freeleech=on')[self.freeleech])
                html = self.get_url(search_url)
                if self.should_skip():
                    return results

                cnt = len(items[mode])
                try:
                    if not html or self._has_no_results(html):
                        raise generic.HaltParseException

                    with BS4Parser(html, features=['html5lib', 'permissive']) as soup:
                        torrent_table = soup.find('table', attrs={'cellpadding': 5})
                        torrent_rows = [] if not torrent_table else torrent_table.find_all('tr')

                        if 2 > len(torrent_rows):
                            raise generic.HaltParseException

                        head = None
                        for tr in torrent_rows[1:]:
                            cells = tr.find_all('td')
                            if 4 > len(cells):
                                continue
                            try:
                                head = head if None is not head else self._header_row(tr)
                                seeders, leechers, size = [tryInt(n, n) for n in [
                                    cells[head[x]].get_text().strip() for x in 'seed', 'leech', 'size']]
                                if None is tr.find('a', href=rc['cats']) or self._reject_item(
                                        seeders, leechers,
                                        self.freeleech and (None is rc['fl'].search(cells[1].get_text()))):
                                    continue

                                info = tr.find('a', href=rc['info'])
                                title = (info.attrs.get('title') or info.get_text()).strip()
                                download_url = self._link('%s/%s' % (
                                    re.sub(rc['get'], r'\1', str(info.attrs['href'])), str(title).replace(' ', '.')))
                            except (AttributeError, TypeError, ValueError, KeyError):
                                continue

                            if title and download_url:
                                items[mode].append((title, download_url, seeders, self._bytesizer(size)))
Esempio n. 11
0
    def _search_provider(self, search_params, **kwargs):

        results = []
        if not self._authorised():
            return results

        items = {'Cache': [], 'Season': [], 'Episode': [], 'Propers': []}

        rc = dict((k, re.compile('(?i)' + v)) for (k, v) in {'info': 'detail', 'get': 'download',
                                                             'cats': 'cat=(?:%s)' % self._categories_string(template='', delimiter='|')
                                                             }.items())
        for mode in search_params.keys():
            for search_string in search_params[mode]:
                search_string = isinstance(search_string, unicode) and unidecode(search_string) or search_string
                search_url = self.urls['search'] % (self._categories_string(), search_string)

                html = self.get_url(search_url)

                cnt = len(items[mode])
                try:
                    if not html or self._has_no_results(html):
                        raise generic.HaltParseException

                    with BS4Parser(html, features=['html5lib', 'permissive']) as soup:
                        torrent_table = soup.find('table', id='torrentsTable')
                        torrent_rows = [] if not torrent_table else torrent_table.find_all('tr')

                        if 2 > len(torrent_rows):
                            raise generic.HaltParseException

                        for tr in torrent_rows[1:]:
                            try:
                                seeders, leechers, size = [tryInt(n, n) for n in [
                                    tr.find_all('td')[x].get_text().strip() for x in (-2, -1, -3)]]
                                if None is tr.find('a', href=rc['cats']) or self._peers_fail(mode, seeders, leechers):
                                    continue

                                info = tr.find('a', href=rc['info'])
                                title = 'title' in info.attrs and info.attrs['title'] or info.get_text().strip()
                                download_url = self.urls['get'] % tr.find('a', href=rc['get']).get('href')
                            except (AttributeError, TypeError, ValueError):
                                continue

                            if title and download_url:
                                items[mode].append((title, download_url, seeders, self._bytesizer(size)))

                except generic.HaltParseException:
                    pass
                except Exception:
                    logger.log(u'Failed to parse. Traceback: %s' % traceback.format_exc(), logger.ERROR)

                self._log_search(mode, len(items[mode]) - cnt, search_url)

            self._sort_seeders(mode, items)

            results = list(set(results + items[mode]))

        return results
Esempio n. 12
0
    def _doSearch(self, search_params):

        results = []
        items = {'Season': [], 'Episode': [], 'RSS': []}

        for mode in search_params.keys():
            for search_string in search_params[mode]:

                if mode == 'RSS':
                    searchURL = self.url + 'index.php?page=torrents&active=1&category=%s' %(';'.join(self.categories[mode]))
                    logger.log(u"PublicHD cache update URL: "+ searchURL, logger.DEBUG)
                else:
                    searchURL = self.searchurl %(urllib.quote(unidecode(search_string)), ';'.join(self.categories[mode]))
                    logger.log(u"Search string: " + searchURL, logger.DEBUG)

                html = self.getURL(searchURL)
                if not html:
                    continue

                try:
                    soup = BeautifulSoup(html, features=["html5lib", "permissive"])

                    torrent_table = soup.find('table', attrs = {'id' : 'torrbg'})
                    torrent_rows = torrent_table.find_all('tr') if torrent_table else []

                    #Continue only if one Release is found
                    if len(torrent_rows)<2:
                        logger.log(u"The Data returned from " + self.name + " do not contains any torrent", logger.DEBUG)
                        continue

                    for tr in torrent_rows[1:]:

                        try:
                            link = self.url + tr.find(href=re.compile('page=torrent-details'))['href']
                            title = tr.find(lambda x: x.has_attr('title')).text.replace('_','.')
                            url = tr.find(href=re.compile('magnet+'))['href']
                            seeders = int(tr.find_all('td', {'class': 'header'})[4].text)
                            leechers = int(tr.find_all('td', {'class': 'header'})[5].text)
                        except (AttributeError, TypeError):
                            continue

                        if mode != 'RSS' and seeders == 0:
                            continue

                        if not title or not url:
                            continue

                        item = title, url, link, seeders, leechers

                        items[mode].append(item)

                except Exception, e:
                    logger.log(u"Failed to parsing " + self.name + " Traceback: "  + traceback.format_exc(), logger.ERROR)

            #For each search mode sort all the items by seeders
            items[mode].sort(key=lambda tup: tup[3], reverse=True)

            results += items[mode]
Esempio n. 13
0
    def _do_search(self, search_params, search_mode='eponly', epcount=0, age=0):

        results = []
        if not self._do_login():
            return results

        items = {'Season': [], 'Episode': [], 'Cache': []}

        rc = dict((k, re.compile('(?i)' + v)) for (k, v) in {'info': 'detail', 'get': 'download'}.items())
        for mode in search_params.keys():
            for search_string in search_params[mode]:
                if isinstance(search_string, unicode):
                    search_string = unidecode(search_string)

                search_url = self.urls['search'] % (search_string, self.categories)
                html = self.get_url(search_url)

                cnt = len(items[mode])
                try:
                    if not html or self._has_no_results(html):
                        raise generic.HaltParseException

                    with BS4Parser(html, features=['html5lib', 'permissive']) as soup:
                        torrent_table = soup.find('table', attrs={'border': '1'})
                        torrent_rows = [] if not torrent_table else torrent_table.find_all('tr')

                        if 2 > len(torrent_rows):
                            raise generic.HaltParseException

                        for tr in torrent_rows[1:]:
                            try:
                                seeders, leechers = [int(tr.find_all('td')[x].get_text().strip()) for x in (-2, -1)]
                                if 'Cache' != mode and (seeders < self.minseed or leechers < self.minleech):
                                    continue

                                info = tr.find('a', href=rc['info'])
                                title = 'title' in info.attrs and info.attrs['title'] or info.get_text().strip()

                                download_url = self.urls['get'] % str(tr.find('a', href=rc['get'])['href']).lstrip('/')
                            except (AttributeError, TypeError):
                                continue

                            if title and download_url:
                                items[mode].append((title, download_url, seeders))

                except generic.HaltParseException:
                    pass
                except Exception:
                    logger.log(u'Failed to parse. Traceback: %s' % traceback.format_exc(), logger.ERROR)

                self._log_result(mode, len(items[mode]) - cnt, search_url)

            # For each search mode sort all the items by seeders
            items[mode].sort(key=lambda tup: tup[2], reverse=True)

            results += items[mode]

        return results
Esempio n. 14
0
    def _search_provider(self, search_params, **kwargs):

        results = []
        if not self._authorised():
            return results

        items = {'Cache': [], 'Season': [], 'Episode': [], 'Propers': []}

        rc = dict((k, re.compile('(?i)' + v)) for (k, v) in {'info': 'detail', 'get': 'download'}.items())
        for mode in search_params.keys():
            for search_string in search_params[mode]:
                search_string = isinstance(search_string, unicode) and unidecode(search_string) or search_string
                search_url = self.urls['browse'] % (self._categories_string(), ('3', '0')[not self.freeleech],
                                                    (self.urls['search'] % search_string, '')['Cache' == mode])

                html = self.get_url(search_url)
                if self.should_skip():
                    return results

                cnt = len(items[mode])
                try:
                    if not html or self._has_no_results(html):
                        raise generic.HaltParseException

                    html = html.replace('<?xml version="1.0" encoding="iso-8859-1"?>', '')
                    html = re.sub(r'(</td>)[^<]*</td>', r'\1', html)
                    html = re.sub(r'(<a[^<]*)<a[^<]*?href=details[^<]*', r'\1', html)
                    with BS4Parser(html, 'html.parser') as soup:
                        shows_found = False
                        torrent_rows = soup.find_all('tr')
                        for index, row in enumerate(torrent_rows):
                            if 'type' == row.find_all('td')[0].get_text().strip().lower():
                                shows_found = index
                                break

                        if not shows_found or 2 > (len(torrent_rows) - shows_found):
                            raise generic.HaltParseException

                        head = None
                        for tr in torrent_rows[1 + shows_found:]:
                            cells = tr.find_all('td')
                            if 4 > len(cells):
                                continue
                            try:
                                head = head if None is not head else self._header_row(torrent_rows[shows_found])
                                seeders, leechers, size = [tryInt(n, n) for n in [
                                    cells[head[x]].get_text().strip() for x in 'seed', 'leech', 'size']]
                                if self._reject_item(seeders, leechers):
                                    continue

                                info = tr.find('a', href=rc['info'])
                                title = (info.attrs.get('title') or info.get_text()).strip()
                                download_url = self._link(tr.find('a', href=rc['get'])['href'])
                            except (AttributeError, TypeError, ValueError, KeyError):
                                continue

                            if title and download_url:
                                items[mode].append((title, download_url, seeders, self._bytesizer(size)))
Esempio n. 15
0
    def _search_provider(self, search_params, **kwargs):

        results = []
        if not self._authorised():
            return results

        items = {'Cache': [], 'Season': [], 'Episode': [], 'Propers': []}

        rc = dict((k, re.compile('(?i)' + v)) for (k, v) in {'info': 'detail', 'get': 'download',
                                                             'nuked': 'nuke', 'filter': 'free'}.items())
        for mode in search_params.keys():
            for search_string in search_params[mode]:
                search_string = isinstance(search_string, unicode) and unidecode(search_string) or search_string
                search_url = self.urls['search'] % (search_string, self._categories_string(mode, '%s', ','))

                html = self.get_url(search_url, timeout=90)
                if self.should_skip():
                    return results

                cnt = len(items[mode])
                try:
                    if not html or self._has_no_results(html):
                        raise generic.HaltParseException

                    with BS4Parser(html, features=['html5lib', 'permissive'], attr='cellpadding="5"') as soup:
                        torrent_table = soup.find('table', class_='browse')
                        torrent_rows = [] if not torrent_table else torrent_table.find_all('tr')

                        if 2 > len(torrent_rows):
                            raise generic.HaltParseException

                        head = None
                        for tr in torrent_rows[1:]:
                            cells = tr.find_all('td')
                            if 5 > len(cells):
                                continue
                            try:
                                info = tr.find('a', href=rc['info'])
                                head = head if None is not head else self._header_row(tr)
                                seeders, leechers, size = [n for n in [
                                    cells[head[x]].get_text().strip() for x in 'leech', 'leech', 'size']]
                                seeders, leechers, size = [tryInt(n, n) for n in
                                                           list(re.findall('^(\d+)[^\d]+?(\d+)', leechers)[0])
                                                           + re.findall('^[^\n\t]+', size)]
                                if self._reject_item(seeders, leechers,
                                                     self.freeleech and (not tr.find('a', class_=rc['filter'])),
                                                     self.confirmed and (any([tr.find('img', alt=rc['nuked']),
                                                                              tr.find('img', class_=rc['nuked'])]))):
                                    continue

                                title = (info.attrs.get('title') or info.get_text()).strip()
                                download_url = self._link(tr.find('a', href=rc['get'])['href'])
                            except (AttributeError, TypeError, ValueError, KeyError):
                                continue

                            if title and download_url:
                                items[mode].append((title, download_url, seeders, self._bytesizer(size)))
Esempio n. 16
0
    def _search_provider(self, search_params, **kwargs):

        results = []
        if not self._authorised():
            return results

        items = {'Cache': [], 'Season': [], 'Episode': [], 'Propers': []}

        rc = dict((k, re.compile('(?i)' + v)) for (k, v) in {
            'info': 'detail', 'get': 'download', 'fl': '\(Freeleech\)'}.items())
        for mode in search_params.keys():
            for search_string in search_params[mode]:
                search_string = isinstance(search_string, unicode) and unidecode(search_string) or search_string
                search_url = self.urls['search'] % (search_string, self._categories_string(mode),
                                                    ('3', '0')[not self.freeleech])

                html = self.get_url(search_url, timeout=90)
                if self.should_skip():
                    return results

                cnt = len(items[mode])
                try:
                    if not html or self._has_no_results(html):
                        raise generic.HaltParseException

                    html = html.replace('<table width=100% border=0 align=center cellpadding=0 cellspacing=0>', '')
                    html = re.sub(r'(?s)(.*)(<table[^>]*?950[^>]*>.*)(</body>)', r'\1\3', html)
                    html = re.sub(r'(?s)<table[^>]+font[^>]+>', '<table id="parse">', html)
                    html = re.sub(r'(?s)(<td[^>]+>(?!<[ab]).*?)(?:(?:</[ab]>)+)', r'\1', html)
                    html = re.sub(r'(?m)^</td></tr></table>', r'', html)
                    with BS4Parser(html, features=['html5lib', 'permissive'], attr='id="parse"') as soup:
                        torrent_table = soup.find('table', id='parse')
                        torrent_rows = [] if not torrent_table else torrent_table.find_all('tr')

                        if 2 > len(torrent_rows):
                            raise generic.HaltParseException

                        head = None
                        for tr in torrent_rows[1:]:
                            cells = tr.find_all('td')
                            if 5 > len(cells):
                                continue
                            try:
                                head = head if None is not head else self._header_row(tr)
                                seeders, leechers, size = [tryInt(n, n) for n in [
                                    cells[head[x]].get_text().strip() for x in 'seed', 'leech', 'size']]
                                if self._reject_item(seeders, leechers):
                                    continue

                                info = tr.find('a', href=rc['info'])
                                title = (info.attrs.get('title') or info.get_text().split()[0]).strip()
                                download_url = self._link(tr.find('a', href=rc['get'])['href'])
                            except (AttributeError, TypeError, ValueError, KeyError):
                                continue

                            if title and download_url:
                                items[mode].append((title, download_url, seeders, self._bytesizer(size)))
Esempio n. 17
0
    def _doSearch(self, search_params, show=None):

        results = []
        items = {"Season": [], "Episode": []}

        if not self._doLogin():
            return []

        for mode in search_params.keys():
            for search_string in search_params[mode]:

                searchURL = self.urls["search"] % (unidecode(search_string), self.categories)

                logger.log(u"Search string: " + searchURL, logger.DEBUG)

                data = self.getURL(searchURL)
                if not data:
                    continue

                try:
                    html = BeautifulSoup(data, features=["html5lib", "permissive"])

                    torrent_table = html.find("table", attrs={"id": "torrenttable"})
                    torrent_rows = torrent_table.find_all("tr") if torrent_table else []

                    if not torrent_rows:
                        #                        logger.log(u"No results found for: " + search_string + "(" + searchURL + ")", logger.DEBUG)
                        continue

                    for result in torrent_table.find_all("tr")[1:]:

                        link = result.find("td", attrs={"class": "name"}).find("a")
                        url = result.find("td", attrs={"class": "quickdownload"}).find("a")

                        title = link.string
                        download_url = self.urls["download"] % url["href"]
                        id = int(link["href"].replace("/torrent/", ""))
                        seeders = int(result.find("td", attrs={"class": "seeders"}).string)
                        leechers = int(result.find("td", attrs={"class": "leechers"}).string)

                        # Filter unseeded torrent
                        if seeders == 0 or not title or not download_url:
                            continue

                        item = title, download_url, id, seeders, leechers
                        logger.log(u"Found result: " + title + "(" + searchURL + ")", logger.DEBUG)

                        items[mode].append(item)

                except Exception, e:
                    logger.log(u"Failed parsing " + self.name + " Traceback: " + traceback.format_exc(), logger.ERROR)

            # For each search mode sort all the items by seeders
            items[mode].sort(key=lambda tup: tup[3], reverse=True)

            results += items[mode]
Esempio n. 18
0
    def _search_provider(self, search_params, **kwargs):

        results = []
        if not self._authorised():
            return results

        items = {'Cache': [], 'Season': [], 'Episode': [], 'Propers': []}

        rc = dict((k, re.compile('(?i)' + v))
                  for (k, v) in {'info': 'view', 'get': 'download', 'name': 'showname', 'nuked': 'nuked'}.items())
        for mode in search_params.keys():
            for search_string in search_params[mode]:
                search_string = isinstance(search_string, unicode) and unidecode(search_string) or search_string
                search_url = self.urls['search'] % search_string

                # fetches 15 results by default, and up to 100 if allowed in user profile
                html = self.get_url(search_url)
                if self.should_skip():
                    return results

                cnt = len(items[mode])
                try:
                    if not html or self._has_no_results(html):
                        raise generic.HaltParseException

                    with BS4Parser(html, features=['html5lib', 'permissive']) as soup:
                        torrent_table = soup.find('table', class_='torrent_table')
                        torrent_rows = []
                        if torrent_table:
                            torrent_rows = torrent_table.find_all('tr')

                        if 2 > len(torrent_rows):
                            raise generic.HaltParseException

                        head = None
                        for tr in torrent_rows[1:]:
                            cells = tr.find_all('td')
                            if 5 > len(cells) or tr.find('img', alt=rc['nuked']):
                                continue
                            try:
                                head = head if None is not head else self._header_row(tr)
                                seeders, leechers, size = [tryInt(n, n) for n in [
                                    cells[head[x]].get_text().strip() for x in 'seed', 'leech', 'size']]
                                if self._reject_item(seeders, leechers):
                                    continue

                                title = tr.find('a', title=rc['info']).get_text().strip()
                                if title.lower().startswith('season '):
                                    title = '%s %s' % (tr.find('div', class_=rc['name']).get_text().strip(), title)

                                download_url = self._link(tr.find('a', href=rc['get'])['href'])
                            except (AttributeError, TypeError, ValueError):
                                continue

                            if title and download_url:
                                items[mode].append((title, download_url, seeders, self._bytesizer(size)))
Esempio n. 19
0
    def _search_provider(self, search_params, **kwargs):

        results = []

        items = {'Cache': [], 'Season': [], 'Episode': [], 'Propers': []}

        rc = dict((k, re.compile('(?i)' + v)) for (k, v) in {
            'info': '(^(info|torrent)/|/[\w+]{40,}\s*$)', 'get': '^magnet:'}.items())

        for mode in search_params.keys():
            for search_string in search_params[mode]:
                search_string = isinstance(search_string, unicode) and unidecode(search_string) or search_string

                if 'Cache' == mode:
                    search_url = self.urls['search'] % tuple(search_string.split(','))
                else:
                    search_url = self.urls['search'] % (search_string.replace('.', ' '), '')
                html = self.get_url(search_url)
                if self.should_skip():
                    return results

                cnt = len(items[mode])
                try:
                    if not html or self._has_no_results(html):
                        raise generic.HaltParseException

                    with BS4Parser(html, features=['html5lib', 'permissive']) as soup:
                        torrent_table = soup.find('table', attrs={'class': ['table', 'is-striped']})
                        torrent_rows = [] if not torrent_table else torrent_table.find_all('tr')

                        if 2 > len(torrent_rows):
                            raise generic.HaltParseException

                        head = None
                        for tr in torrent_rows[1:]:
                            cells = tr.find_all('td')
                            if 5 > len(cells):
                                continue
                            try:
                                head = head if None is not head else self._header_row(tr)
                                seeders, leechers, size = [tryInt(n, n) for n in [
                                    cells[head[x]].get_text().strip() for x in 'seed', 'leech', 'size']]
                                if self._reject_item(seeders, leechers):
                                    continue

                                info = tr.select(
                                    '[alt*="magnet"], [title*="magnet"], [alt*="torrent"], [title*="torrent"]')[0] \
                                    or tr.find('a', href=rc['info'])
                                title = re.sub('\s(using|use|magnet|link)', '', (
                                        info.attrs.get('title') or info.attrs.get('alt') or info.get_text())).strip()
                                download_url = self._link(tr.find('a', href=rc['get'])['href'])
                            except (AttributeError, TypeError, ValueError, KeyError):
                                continue

                            if title and download_url:
                                items[mode].append((title, download_url, seeders, self._bytesizer(size)))
Esempio n. 20
0
 def logged_in(self, y):
     if all([None is y or 'logout' in y,
             bool(filter(lambda c: 'remember_web_' in c, self.session.cookies.keys()))]):
         if None is not y:
             self.shows = dict(re.findall('<option value="(\d+)">(.*?)</option>', y))
             h = HTMLParser()
             for k, v in self.shows.items():
                 self.shows[k] = sanitizeSceneName(h.unescape(unidecode(v.decode('utf-8'))))
         return True
     return False
Esempio n. 21
0
    def _search_provider(self, search_params, **kwargs):

        results = []
        if not self._authorised():
            return results

        items = {'Cache': [], 'Season': [], 'Episode': [], 'Propers': []}

        rc = dict((k, re.compile('(?i)' + v)) for (k, v) in {'nodots': '[\.\s]+'}.items())
        for mode in search_params.keys():
            for search_string in search_params[mode]:
                search_string = isinstance(search_string, unicode) and unidecode(search_string) or search_string

                search_url = self.urls['browse'] % (self.user_authkey, self.user_passkey)
                if 'Cache' != mode:
                    search_url += self.urls['search'] % rc['nodots'].sub('+', search_string)

                data_json = self.get_url(search_url, json=True)
                if self.should_skip():
                    return results

                cnt = len(items[mode])
                try:
                    for item in data_json.get('response', {}).get('results', []):
                        if self.freeleech and not item.get('isFreeleech'):
                            continue

                        seeders, leechers, group_name, torrent_id, size = [tryInt(n, n) for n in [item.get(x) for x in [
                            'seeders', 'leechers', 'groupName', 'torrentId', 'size']]]
                        if self._reject_item(seeders, leechers):
                            continue

                        try:
                            title_parts = group_name.split('[')
                            maybe_res = re.findall('((?:72|108|216)0\w)', title_parts[1])
                            maybe_ext = re.findall('(?i)(%s)' % '|'.join(common.mediaExtensions), title_parts[1])
                            detail = title_parts[1].split('/')
                            detail[1] = detail[1].strip().lower().replace('mkv', 'x264')
                            title = '%s.%s' % (BS4Parser(title_parts[0].strip(), 'html.parser').soup.string, '.'.join(
                                (maybe_res and [maybe_res[0]] or []) +
                                [detail[0].strip(), detail[1], maybe_ext and maybe_ext[0].lower() or 'mkv']))
                        except (IndexError, KeyError):
                            title = self.regulate_title(item, group_name)
                        download_url = self.urls['get'] % (self.user_authkey, self.user_passkey, torrent_id)

                        if title and download_url:
                            items[mode].append((title, download_url, seeders, self._bytesizer(size)))

                except (StandardError, Exception):
                    logger.log(u'Failed to parse. Traceback: %s' % traceback.format_exc(), logger.ERROR)
                self._log_search(mode, len(items[mode]) - cnt, search_url)

            results = self._sort_seeding(mode, results + items[mode])

        return results
Esempio n. 22
0
    def _search_provider(self, search_params, **kwargs):

        results = []
        if not self._authorised():
            return results

        items = {'Cache': [], 'Season': [], 'Episode': [], 'Propers': []}

        rc = dict((k, re.compile('(?i)' + v))
                  for (k, v) in {'info': 'details', 'get': 'download', 'size': '(.*)\n.*'}.items())
        for mode in search_params.keys():
            rc['cats'] = re.compile('(?i)cat=(?:%s)' % self._categories_string(mode, template='', delimiter='|'))
            for search_string in search_params[mode]:
                search_string = isinstance(search_string, unicode) and unidecode(search_string) or search_string

                html = self.get_url(self.urls['search'] % ('+'.join(search_string.split()),
                                                           self._categories_string(mode)))
                if self.should_skip():
                    return results

                cnt = len(items[mode])
                try:
                    if not html or self._has_no_results(html):
                        raise generic.HaltParseException

                    with BS4Parser(html, features=['html5lib', 'permissive']) as soup:
                        torrent_table = soup.find('table', id='torrents-table')
                        torrent_rows = [] if not torrent_table else torrent_table.find_all('tr')

                        if 2 > len(torrent_rows):
                            raise generic.HaltParseException

                        head = None
                        for tr in torrent_rows[1:]:
                            cells = tr.find_all('td')
                            if 5 > len(cells):
                                continue
                            try:
                                head = head if None is not head else self._header_row(
                                    tr, {'seed': r'(?:up\.png|seed|s/l)', 'leech': r'(?:down\.png|leech|peers)'})
                                seeders, leechers, size = [tryInt(n, n) for n in [
                                    cells[head[x]].get_text().strip() for x in 'seed', 'leech', 'size']]
                                if not tr.find('a', href=rc['cats']) or self._reject_item(seeders, leechers):
                                    continue

                                title = tr.find('a', href=rc['info']).get_text().strip()
                                size = rc['size'].sub(r'\1', size)
                                download_url = self._link(tr.find('a', href=rc['get'])['href'])
                            except (AttributeError, TypeError, ValueError, IndexError):
                                continue

                            if title and download_url:
                                items[mode].append((title, download_url, seeders, self._bytesizer(size)))
Esempio n. 23
0
    def _doSearch(self, search_params, search_mode='eponly', epcount=0, age=0, epObj=None):

        logger.log(u"_doSearch started with ..." + str(search_params), logger.DEBUG)

        results = []
        items = {'Season': [], 'Episode': [], 'RSS': []}

        for mode in search_params.keys():

            for search_string in search_params[mode]:

                if isinstance(search_string, unicode):
                    search_string = unidecode(search_string)

                
                searchURL = self.urls['search'] % (urllib.quote(search_string), self.categories)

                logger.log(u"Search string: " + searchURL, logger.DEBUG)
                                
                data = self.getURL(searchURL)
                if not data:
                    continue

                try:
                    with BS4Parser(data, features=["html5lib", "permissive"]) as html:
                        result_linkz = html.findAll('a',  href=re.compile("torrents-details"))
        
                        if not result_linkz:
                            logger.log(u"The Data returned from " + self.name + " do not contains any torrent",
                                       logger.DEBUG)
                            continue
                        
                        if result_linkz:
                            for link in result_linkz:                                                                                                                               
                                title = link.text
                                logger.log(u"BLUETIGERS TITLE TEMP: " + title, logger.DEBUG)                   
                                download_url =   self.urls['base_url']  + "/" + link['href'] 
                                download_url = download_url.replace("torrents-details","download")             
                                logger.log(u"BLUETIGERS downloadURL: " + download_url, logger.DEBUG)
        
                                if not title or not download_url:
                                   continue

                                item = title, download_url
                                logger.log(u"Found result: " + title.replace(' ','.') + " (" + download_url + ")", logger.DEBUG)

                                items[mode].append(item)

                except Exception, e:
                    logger.log(u"Failed parsing " + self.name + " Traceback: " + traceback.format_exc(), logger.ERROR)

            results += items[mode]
Esempio n. 24
0
    def _search_provider(self, search_params, **kwargs):

        results = []
        if not self._authorised():
            return results

        items = {'Cache': [], 'Season': [], 'Episode': [], 'Propers': []}

        rc = dict((k, re.compile('(?i)' + v)) for (k, v) in {
            'info': 'detail', 'get': 'download', 'filter': 'fa-(?:heart|star)'}.items())
        for mode in search_params.keys():
            for search_string in search_params[mode]:
                search_string = isinstance(search_string, unicode) and unidecode(search_string) or search_string
                search_url = self.urls['search'] % (search_string, self._categories_string(mode, 'cats2[]=%s'))

                html = self.get_url(search_url, timeout=90)
                if self.should_skip():
                    return results

                cnt = len(items[mode])
                try:
                    if not html or self._has_no_results(html):
                        raise generic.HaltParseException

                    with BS4Parser(html, features=['html5lib', 'permissive']) as soup:
                        torrent_table = soup.find('table', class_='yenitorrenttable')
                        torrent_rows = [] if not torrent_table else torrent_table.find_all('tr')

                        if 2 > len(torrent_rows):
                            raise generic.HaltParseException

                        head = None
                        for tr in torrent_rows[1:]:
                            cells = tr.find_all('td')
                            if 5 > len(cells):
                                continue
                            try:
                                head = head if None is not head else self._header_row(
                                    tr, custom_tags=[('span', 'data-original-title')])
                                seeders, leechers, size = [n for n in [
                                    cells[head[x]].get_text().strip() for x in 'seed', 'leech', 'size']]
                                if self._reject_item(seeders, leechers, self.freeleech and (
                                        not tr.find('i', class_=rc['filter']))):
                                    continue

                                title = tr.find('a', href=rc['info']).get_text().strip()
                                download_url = self._link(tr.find('a', href=rc['get'])['href'])
                            except (AttributeError, TypeError, ValueError, KeyError):
                                continue

                            if title and download_url:
                                items[mode].append((title, download_url, seeders, self._bytesizer(size)))
Esempio n. 25
0
    def _search_provider(self, search_params, **kwargs):

        results = []
        if not self._authorised():
            return results

        items = {'Cache': [], 'Season': [], 'Episode': [], 'Propers': []}

        rc = dict((k, re.compile('(?i)' + v)) for (k, v) in {'info': 'view', 'get': 'download'}.items())
        for mode in search_params.keys():
            for search_string in search_params[mode]:
                search_string = isinstance(search_string, unicode) and unidecode(search_string) or search_string
                search_url = self.urls['search'] % (search_string, ('&freetorrent=1', '')[not self.freeleech])

                html = self.get_url(search_url)
                if self.should_skip():
                    return results

                cnt = len(items[mode])
                try:
                    if not html or self._has_no_results(html):
                        raise generic.HaltParseException

                    with BS4Parser(html, features=['html5lib', 'permissive']) as soup:
                        torrent_table = soup.find(id='torrent_table')
                        torrent_rows = [] if not torrent_table else torrent_table.find_all('tr')

                        if 2 > len(torrent_rows):
                            raise generic.HaltParseException

                        head = None
                        for tr in torrent_rows[1:]:
                            cells = tr.find_all('td')
                            if 5 > len(cells):
                                continue
                            try:
                                head = head if None is not head else self._header_row(tr)
                                seeders, leechers, size = [tryInt(n, n) for n in [
                                    cells[head[x]].get_text().strip() for x in 'seed', 'leech', 'size']]
                                if self._reject_item(seeders, leechers, self.freeleech and (
                                        any([not tr.select('.tl_free'),
                                             tr.select('.tl_timed'), tr.select('[title^="Timed Free"]'),
                                             tr.select('.tl_expired'), tr.select('[title^="Expired Free"]')]))):
                                    continue

                                title = tr.find('a', title=rc['info']).get_text().strip()
                                download_url = self._link(tr.find('a', title=rc['get'])['href'])
                            except (AttributeError, TypeError, ValueError):
                                continue

                            if title and download_url:
                                items[mode].append((title, download_url, seeders, self._bytesizer(size)))
Esempio n. 26
0
    def _search_provider(self, search_params, **kwargs):

        results = []
        if not self._authorised():
            return results

        items = {'Cache': [], 'Season': [], 'Episode': [], 'Propers': []}

        rc = dict((k, re.compile('(?i)' + v)) for (k, v) in {'get': 'download'}.items())
        for mode in search_params.keys():
            for search_string in search_params[mode]:
                search_url = self.urls[('search', 'browse')['Cache' == mode]] % {
                    'cats': self._categories_string(mode, '', ','),
                    'query': isinstance(search_string, unicode) and unidecode(search_string) or search_string}

                html = self.get_url(search_url)
                if self.should_skip():
                    return results

                cnt = len(items[mode])
                try:
                    if not html or self._has_no_results(html):
                        raise generic.HaltParseException

                    with BS4Parser(html, features=['html5lib', 'permissive']) as soup:
                        torrent_table = soup.find(id='torrenttable')
                        torrent_rows = [] if not torrent_table else torrent_table.find_all('tr')

                        if 2 > len(torrent_rows):
                            raise generic.HaltParseException

                        head = None
                        for tr in torrent_rows[1:]:
                            cells = tr.find_all('td')
                            if 6 > len(cells):
                                continue
                            try:
                                head = head if None is not head else self._header_row(tr)
                                seeders, leechers = [tryInt(n) for n in [
                                    tr.find('td', class_=x).get_text().strip() for x in 'seeders', 'leechers']]
                                if self._reject_item(seeders, leechers):
                                    continue

                                info = tr.find('td', class_='name').a
                                title = (info.attrs.get('title') or info.get_text()).strip()
                                size = cells[head['size']].get_text().strip()
                                download_url = self._link(tr.find('a', href=rc['get'])['href'])
                            except (AttributeError, TypeError, ValueError):
                                continue

                            if title and download_url:
                                items[mode].append((title, download_url, seeders, self._bytesizer(size)))
Esempio n. 27
0
    def _search_provider(self, search_params, **kwargs):

        results = []

        if self.show and not self.show.is_anime:
            return results

        items = {'Cache': [], 'Season': [], 'Episode': [], 'Propers': []}

        rc = dict((k, re.compile('(?i)' + v)) for (k, v) in {'info': 'view', 'get': '(?:torrent|magnet:)'}.items())
        for mode in search_params.keys():
            for search_string in search_params[mode]:
                search_string = isinstance(search_string, unicode) and unidecode(search_string) or search_string
                search_url = self.urls['search'] % ((0, 2)[self.confirmed], search_string.replace('.', ' '))

                html = self.get_url(search_url)
                if self.should_skip():
                    return results

                cnt = len(items[mode])
                try:
                    if not html or self._has_no_results(html):
                        raise generic.HaltParseException

                    with BS4Parser(html, features=['html5lib', 'permissive']) as soup:
                        torrent_table = soup.find('table', class_='torrent-list')
                        torrent_rows = [] if not torrent_table else torrent_table.find_all('tr')

                        if 2 > len(torrent_rows):
                            raise generic.HaltParseException

                        head = None
                        for tr in torrent_rows[1:]:
                            cells = tr.find_all('td')
                            if 5 > len(cells):
                                continue
                            try:
                                head = head if None is not head else self._header_row(tr)
                                seeders, leechers, size = [tryInt(n, n) for n in [
                                    cells[head[x]].get_text().strip() for x in 'seed', 'leech', 'size']]
                                if self._reject_item(seeders, leechers):
                                    continue

                                title = tr.find('a', href=rc['info']).get_text().strip()
                                download_url = self._link(tr.find('a', href=rc['get'])['href'])
                            except (AttributeError, TypeError, ValueError, IndexError):
                                continue

                            if title and download_url:
                                items[mode].append((title, download_url, seeders, self._bytesizer(size)))
Esempio n. 28
0
    def _search_provider(self, search_params, **kwargs):

        results = []
        if not self._authorised():
            return results

        items = {'Cache': [], 'Season': [], 'Episode': [], 'Propers': []}

        rc = dict((k, re.compile('(?i)' + v)) for (k, v) in {
            'info': 'view', 'get': 'download', 'nodots': '[\.\s]+'}.items())
        for mode in search_params.keys():
            for search_string in search_params[mode]:
                search_string = isinstance(search_string, unicode) and unidecode(search_string) or search_string
                search_url = self.urls['search'] % (
                    self._categories_string(mode, 'filter_cat[%s]=1'), rc['nodots'].sub('+', search_string))
                html = self.get_url(search_url)

                cnt = len(items[mode])
                try:
                    if not html or self._has_no_results(html) or 'Translation: No search results' in html:
                        raise generic.HaltParseException

                    with BS4Parser(html, features=['html5lib', 'permissive']) as soup:
                        torrent_table = soup.find(id='torrent_table')
                        torrent_rows = [] if not torrent_table else torrent_table.find_all('tr')

                        if 2 > len(torrent_rows):
                            raise generic.HaltParseException

                        head = None
                        for tr in torrent_rows[1:]:
                            cells = tr.find_all('td')
                            if 5 > len(cells):
                                continue
                            try:
                                head = head if None is not head else self._header_row(tr)
                                seeders, leechers, size = [tryInt(n, n) for n in [
                                    cells[head[x]].get_text().strip() for x in 'seed', 'leech', 'size']]
                                if self._reject_item(seeders, leechers, self.freeleech and (not bool(
                                        re.search('(?i)>\s*Freeleech!*\s*<', cells[1].encode(formatter='minimal'))))):
                                    continue

                                title = self.regulate_title(tr.find('a', title=rc['info']).get_text().strip())
                                download_url = self._link(tr.find('a', title=rc['get'])['href'])
                            except (AttributeError, TypeError, ValueError):
                                continue

                            if title and download_url:
                                items[mode].append((title, download_url, seeders, self._bytesizer(size)))
Esempio n. 29
0
    def _search_provider(self, search_params, **kwargs):

        results = []
        if not self._authorised():
            return results

        items = {'Cache': [], 'Season': [], 'Episode': [], 'Propers': []}

        rc = dict((k, re.compile('(?i)' + v)) for (k, v) in {
            'list': '.*?torrent_all', 'info': 'details', 'key': 'key=([^"]+)">Torrent let'}.iteritems())
        for mode in search_params.keys():
            for search_string in search_params[mode]:
                search_string = isinstance(search_string, unicode) and unidecode(search_string) or search_string
                search_url = self.urls['search'] % search_string

                # fetches 15 results by default, and up to 100 if allowed in user profile
                html = self.get_url(search_url)
                if self.should_skip():
                    return results

                cnt = len(items[mode])
                try:
                    if not html or self._has_no_results(html):
                        raise generic.HaltParseException

                    with BS4Parser(html, features=['html5lib', 'permissive']) as soup:
                        torrent_table = soup.find('div', class_=rc['list'])
                        torrent_rows = [] if not torrent_table else torrent_table.find_all('div', class_='box_torrent')
                        key = rc['key'].findall(html)[0]

                        if not len(torrent_rows):
                            raise generic.HaltParseException

                        for tr in torrent_rows:
                            try:
                                seeders, leechers, size = [tryInt(n, n) for n in [
                                    tr.find('div', class_=x).get_text().strip()
                                    for x in 'box_s2', 'box_l2', 'box_meret2']]
                                if self._reject_item(seeders, leechers):
                                    continue

                                anchor = tr.find('a', href=rc['info'])
                                title = (anchor.get('title') or anchor.get_text()).strip()
                                download_url = self._link(anchor.get('href').replace('details', 'download')) + key
                            except (AttributeError, TypeError, ValueError):
                                continue

                            if title and download_url:
                                items[mode].append((title, download_url, seeders, self._bytesizer(size)))
Esempio n. 30
0
def slugify(text, separator="-"):
	ret = ""
	
	if text:
		for c in unidecode(text).lower():
			try:
				ret += htmlentitydefs.codepoint2name[ord(c)]
			except:
				ret += c

		ret = re.sub("([a-zA-Z])(uml|acute|grave|circ|tilde|cedil)", r"\1", ret)
		ret = re.sub("\W", " ", ret)
		ret = re.sub(" +", separator, ret)

	return ret.strip()
Esempio n. 31
0
    def _doSearch(self, search_params, search_mode='eponly', epcount=0, age=0, epObj=None):

        results = []
        items = {'Season': [], 'Episode': [], 'RSS': []}

        if not self._doLogin():
            return results

        data = []
        searchURLS = []

        for mode in search_params.keys():
            for search_string in search_params[mode]:

                if isinstance(search_string, unicode):
                    search_string = unidecode(search_string)

                if mode == 'Season' and search_mode == 'sponly':
                    searchURLS += [self.urls['archive'] % (urllib.quote(search_string))]
                else:
                    searchURLS += [self.urls['search'] % (urllib.quote(search_string), self.categories)]
                    searchURLS += [self.urls['nonscene'] % (urllib.quote(search_string))]
                    searchURLS += [self.urls['foreign'] % (urllib.quote(search_string))]

                for searchURL in searchURLS:
                    logger.log(u"Search string: " + searchURL, logger.DEBUG)
                    try:
                        data += [x for x in [self.getURL(searchURL)] if x]
                    except Exception as e:
                        logger.log(u"Unable to fetch data reason: {0}".format(str(e)), logger.WARNING)

                if not len(data):
                    continue

            try:
                for dataItem in data:
                    with BS4Parser(dataItem, features=["html5lib", "permissive"]) as html:
                        torrent_table = html.find('table', attrs={'id': 'torrents-table'})
                        torrent_rows = torrent_table.find_all('tr') if torrent_table else []

                        #Continue only if at least one Release is found
                        if len(torrent_rows) < 2:
                            if html.title:
                                source = self.name + " (" + html.title.string + ")"
                            else:
                                source = self.name
                            logger.log(u"The Data returned from " + source + " does not contain any torrent", logger.DEBUG)
                            continue

                        for result in torrent_table.find_all('tr')[1:]:

                            try:
                                link = result.find('td', attrs={'class': 'ttr_name'}).find('a')
                                all_urls = result.find('td', attrs={'class': 'td_dl'}).find_all('a', limit=2)
                                # Foreign section contain two links, the others one
                                if self._isSection('Foreign', dataItem):
                                    url = all_urls[1]
                                else:
                                    url = all_urls[0]

                                title = link.string
                                if re.search('\.\.\.', title):
                                    data = self.getURL(self.url + "/" + link['href'])
                                    if data:
                                        with BS4Parser(data) as details_html:
                                            title = re.search('(?<=").+(?<!")', details_html.title.string).group(0)
                                download_url = self.urls['download'] % url['href']
                                id = int(link['href'].replace('details?id=', ''))
                                seeders = int(result.find('td', attrs={'class': 'ttr_seeders'}).string)
                                leechers = int(result.find('td', attrs={'class': 'ttr_leechers'}).string)
                            except (AttributeError, TypeError):
                                continue

                            if mode != 'RSS' and (seeders < self.minseed or leechers < self.minleech):
                                continue

                            if not title or not download_url:
                                continue

                            item = title, download_url, id, seeders, leechers
                            logger.log(u"Found result: " + title.replace(' ','.') + " (" + searchURL + ")", logger.DEBUG)

                            items[mode].append(item)

                # for each search mode sort all the items by seeders
                items[mode].sort(key=lambda tup: tup[3], reverse=True)
                results += items[mode]

            except Exception, e:
                logger.log(u"Failed parsing " + self.name + " Traceback: " + traceback.format_exc(), logger.ERROR)
                continue
Esempio n. 32
0
    def _doSearch(self, search_params, search_mode='eponly', epcount=0, age=0):

        results = []
        items = {'Season': [], 'Episode': [], 'RSS': []}

        freeleech = '&free=on' if self.freeleech else ''

        if not self._doLogin():
            return []

        for mode in search_params.keys():
            for search_string in search_params[mode]:

                # URL with 50 tv-show results, or max 150 if adjusted in IPTorrents profile
                searchURL = self.urls['search'] % (self.categorie, freeleech,
                                                   unidecode(search_string))
                searchURL += ';o=seeders' if mode != 'RSS' else ''

                logger.log(u"" + self.name + " search page URL: " + searchURL,
                           logger.DEBUG)

                data = self.getURL(searchURL)
                if not data:
                    continue

                try:
                    data = re.sub(r'<button.+?<[\/]button>', '', data, 0,
                                  re.IGNORECASE | re.MULTILINE)
                    with BS4Parser(data, features=["html5lib",
                                                   "permissive"]) as html:
                        if not html:
                            logger.log(u"Invalid HTML data: " + str(data),
                                       logger.DEBUG)
                            continue

                        if html.find(text='No Torrents Found!'):
                            logger.log(
                                u"No results found for: " + search_string +
                                " (" + searchURL + ")", logger.DEBUG)
                            continue

                        torrent_table = html.find('table',
                                                  attrs={'class': 'torrents'})
                        torrents = torrent_table.find_all(
                            'tr') if torrent_table else []

                        #Continue only if one Release is found
                        if len(torrents) < 2:
                            logger.log(
                                u"The Data returned from " + self.name +
                                " do not contains any torrent", logger.WARNING)
                            continue

                        for result in torrents[1:]:

                            try:
                                torrent = result.find_all('td')[1].find('a')
                                torrent_name = torrent.string
                                torrent_download_url = self.urls['base_url'] + (
                                    result.find_all('td')[3].find('a'))['href']
                                torrent_details_url = self.urls[
                                    'base_url'] + torrent['href']
                                torrent_seeders = int(
                                    result.find('td',
                                                attrs={
                                                    'class': 'ac t_seeders'
                                                }).string)
                                ## Not used, perhaps in the future ##
                                #torrent_id = int(torrent['href'].replace('/details.php?id=', ''))
                                #torrent_leechers = int(result.find('td', attrs = {'class' : 'ac t_leechers'}).string)
                            except (AttributeError, TypeError):
                                continue

                            # Filter unseeded torrent and torrents with no name/url
                            if mode != 'RSS' and torrent_seeders == 0:
                                continue

                            if not torrent_name or not torrent_download_url:
                                continue

                            item = torrent_name, torrent_download_url
                            logger.log(
                                u"Found result: " + torrent_name + " (" +
                                torrent_details_url + ")", logger.DEBUG)
                            items[mode].append(item)

                except Exception, e:
                    logger.log(
                        u"Failed parsing " + self.name + " Traceback: " +
                        traceback.format_exc(), logger.ERROR)

            results += items[mode]
Esempio n. 33
0
    def _doSearch(self, search_params, show=None, age=None):

        results = []
        items = {'Season': [], 'Episode': [], 'RSS': []}

        for mode in search_params.keys():
            for search_string in search_params[mode]:

                if mode == 'RSS':
                    searchURL = self.url + 'index.php?page=torrents&active=1&category=%s' % (
                        ';'.join(self.categories[mode]))
                    logger.log(u"PublicHD cache update URL: " + searchURL,
                               logger.DEBUG)
                else:
                    searchURL = self.searchurl % (urllib.quote(
                        unidecode(search_string)), ';'.join(
                            self.categories[mode]))
                    logger.log(u"Search string: " + searchURL, logger.DEBUG)

                html = self.getURL(searchURL)

                #remove unneccecary <option> lines which are slowing down BeautifulSoup
                optreg = re.compile(r'<option.*</option>')
                html = os.linesep.join(
                    [s for s in html.splitlines() if not optreg.search(s)])

                if not html:
                    continue

                try:
                    soup = BeautifulSoup(html,
                                         features=["html5lib", "permissive"])

                    torrent_table = soup.find('table', attrs={'id': 'torrbg'})
                    torrent_rows = torrent_table.find_all(
                        'tr') if torrent_table else []

                    #Continue only if one Release is found
                    if len(torrent_rows) < 2:
                        logger.log(
                            u"The Data returned from " + self.name +
                            " do not contains any torrent", logger.DEBUG)
                        continue

                    for tr in torrent_rows[1:]:

                        try:
                            link = self.url + tr.find(href=re.compile(
                                'page=torrent-details'))['href']
                            title = tr.find(
                                lambda x: x.has_attr('title')).text.replace(
                                    '_', '.')
                            url = tr.find(href=re.compile('magnet+'))['href']
                            seeders = int(
                                tr.find_all('td', {'class': 'header'})[4].text)
                            leechers = int(
                                tr.find_all('td', {'class': 'header'})[5].text)
                        except (AttributeError, TypeError):
                            continue

                        if mode != 'RSS' and seeders == 0:
                            continue

                        if not title or not url:
                            continue

                        item = title, url, link, seeders, leechers

                        items[mode].append(item)

                except Exception, e:
                    logger.log(
                        u"Failed to parsing " + self.name + " Traceback: " +
                        traceback.format_exc(), logger.ERROR)

            #For each search mode sort all the items by seeders
            items[mode].sort(key=lambda tup: tup[3], reverse=True)

            results += items[mode]
Esempio n. 34
0
    def _doSearch(self, search_params, show=None, age=None):

        results = []
        items = {'Season': [], 'Episode': [], 'RSS': []}

        for mode in search_params.keys():
            for search_string in search_params[mode]:

                if mode != 'RSS':
                    searchURL = self.searchurl % (urllib.quote(
                        unidecode(search_string)))
                    logger.log(u"Search string: " + searchURL, logger.DEBUG)
                else:
                    searchURL = self.url + 'tv/?field=time_add&sorder=desc'
                    logger.log(u"KAT cache update URL: " + searchURL,
                               logger.DEBUG)

                html = self.getURL(searchURL)
                if not html:
                    continue

                try:
                    soup = BeautifulSoup(html,
                                         features=["html5lib", "permissive"])

                    torrent_table = soup.find('table', attrs={'class': 'data'})
                    torrent_rows = torrent_table.find_all(
                        'tr') if torrent_table else []

                    #Continue only if one Release is found
                    if len(torrent_rows) < 2:
                        logger.log(
                            u"The Data returned from " + self.name +
                            " do not contains any torrent", logger.WARNING)
                        continue

                    for tr in torrent_rows[1:]:

                        try:
                            link = urlparse.urljoin(self.url, (tr.find(
                                'div', {
                                    'class': 'torrentname'
                                }).find_all('a')[1])['href'])
                            id = tr.get('id')[-7:]
                            title = (tr.find('div', {
                                'class': 'torrentname'
                            }).find_all('a')[1]).text
                            url = tr.find('a', 'imagnet')['href']
                            verified = True if tr.find('a',
                                                       'iverify') else False
                            trusted = True if tr.find(
                                'img', {'alt': 'verified'}) else False
                            seeders = int(tr.find_all('td')[-2].text)
                            leechers = int(tr.find_all('td')[-1].text)
                        except (AttributeError, TypeError):
                            continue

                        if mode != 'RSS' and seeders == 0:
                            continue

                        if sickbeard.KAT_VERIFIED and not verified:
                            logger.log(
                                u"KAT Provider found result " + title +
                                " but that doesn't seem like a verified result so I'm ignoring it",
                                logger.DEBUG)
                            continue

                        #Check number video files = episode in season and find the real Quality for full season torrent analyzing files in torrent
                        if mode == 'Season':
                            ep_number = int(
                                len(search_params['Episode']) /
                                len(set(allPossibleShowNames(self.show))))
                            title = self._find_season_quality(
                                title, link, ep_number)

                        if not title or not url:
                            continue

                        item = title, url, id, seeders, leechers

                        items[mode].append(item)

                except Exception, e:
                    logger.log(
                        u"Failed to parsing " + self.name + " Traceback: " +
                        traceback.format_exc(), logger.ERROR)

            #For each search mode sort all the items by seeders
            items[mode].sort(key=lambda tup: tup[3], reverse=True)

            results += items[mode]
Esempio n. 35
0
    def _doSearch(self, search_params, search_mode='eponly', epcount=0, age=0):

        results = []
        items = {'Season': [], 'Episode': [], 'RSS': []}

        self.categories = "cat=" + str(self.cat)

        if not self._doLogin():
            return []

        for mode in search_params.keys():
            for search_string in search_params[mode]:

                if isinstance(search_string, unicode):
                    search_string = unidecode(search_string)

                last_page = 0
                y = int(self.page)

                if search_string == '':
                    continue

                search_string = str(search_string).replace('.', ' ')

                for x in range(0, y):

                    z = x * 20
                    if last_page:
                        break

                    logger.log(u"Page: " + str(x) + " of " + str(y),
                               logger.DEBUG)

                    if mode != 'RSS':
                        searchURL = (self.urls['search_page'] +
                                     '&filter={2}').format(
                                         z, self.categories, search_string)
                    else:
                        searchURL = self.urls['search_page'].format(
                            z, self.categories)

                    logger.log(u"Search string: " + searchURL, logger.DEBUG)

                    data = self.getURL(searchURL)
                    if not data:
                        logger.log(u"data is empty", logger.DEBUG)
                        continue

                    try:
                        with BS4Parser(data,
                                       features=["html5lib",
                                                 "permissive"]) as html:

                            torrent_table = html.find(
                                'table', attrs={'class': 'copyright'})
                            torrent_rows = torrent_table.find_all(
                                'tr') if torrent_table else []

                            #Continue only if one Release is found
                            logger.log(
                                u"Num of Row: " + str(len(torrent_rows)),
                                logger.DEBUG)

                            #                            if len(torrent_rows) == 0:
                            #
                            #                                self._uid = ""
                            #                                self._hash = ""
                            #                                self._session_id = ""
                            #
                            #                                if not self._doLogin():
                            #                                    return []
                            #
                            #                                continue

                            if len(torrent_rows) < 3:
                                logger.log(
                                    u"The Data returned from " + self.name +
                                    " do not contains any torrent",
                                    logger.DEBUG)
                                last_page = 1
                                continue

                            if len(torrent_rows) < 42:
                                last_page = 1

                            for result in torrent_table.find_all('tr')[2:]:

                                try:
                                    link = result.find('td').find('a')
                                    title = link.string
                                    id = ((result.find_all('td')[8].find('a')
                                           )['href'])[-8:]
                                    download_url = self.urls['download'] % (id)
                                    leechers = result.find_all(
                                        'td')[3].find_all('td')[1].text
                                    leechers = int(leechers.strip('[]'))
                                    seeders = result.find_all(
                                        'td')[3].find_all('td')[2].text
                                    seeders = int(seeders.strip('[]'))
                                except (AttributeError, TypeError):
                                    continue

                                if mode != 'RSS' and (
                                        seeders < self.minseed
                                        or leechers < self.minleech):
                                    continue

                                if not title or not download_url:
                                    continue

                                title = title.replace(" 720p", "").replace(
                                    " Versione 720p", "").replace(
                                        " Versione 1080p",
                                        "") + self._reverseQuality(
                                            self._episodeQuality(result))

                                item = title, download_url, id, seeders, leechers
                                logger.log(
                                    u"Found result: " + title + "(" +
                                    searchURL + ")", logger.DEBUG)

                                if not self._is_italian(
                                        result) and not self.subtitle:
                                    logger.log(u"Subtitled, Skipped",
                                               logger.DEBUG)
                                    continue
                                else:
                                    logger.log(
                                        u"Not Subtitled or Forced, Got It!",
                                        logger.DEBUG)

                                items[mode].append(item)

                    except Exception, e:
                        logger.log(
                            u"Failed parsing " + self.name + " Traceback: " +
                            traceback.format_exc(), logger.ERROR)

                #For each search mode sort all the items by seeders
                items[mode].sort(key=lambda tup: tup[3], reverse=True)

                results += items[mode]
Esempio n. 36
0
    def _search_provider(self, search_params, **kwargs):

        results = []
        if not self._authorised():
            return results

        items = {'Cache': [], 'Season': [], 'Episode': [], 'Propers': []}

        rc = dict(
            (k, re.compile('(?i)' + v)) for (k, v) in {
                'list': '.*?torrent_all',
                'info': 'details',
                'key': 'key=([^"]+)">Torrent let'
            }.iteritems())
        for mode in search_params.keys():
            for search_string in search_params[mode]:
                search_string = isinstance(
                    search_string,
                    unicode) and unidecode(search_string) or search_string
                search_url = self.urls['search'] % search_string

                # fetches 15 results by default, and up to 100 if allowed in user profile
                html = self.get_url(search_url)

                cnt = len(items[mode])
                try:
                    if not html or self._has_no_results(html):
                        raise generic.HaltParseException

                    with BS4Parser(html, features=['html5lib',
                                                   'permissive']) as soup:
                        torrent_table = soup.find('div', class_=rc['list'])
                        torrent_rows = [] if not torrent_table else torrent_table.find_all(
                            'div', class_='box_torrent')
                        key = rc['key'].findall(html)[0]

                        if not len(torrent_rows):
                            raise generic.HaltParseException

                        for tr in torrent_rows:
                            try:
                                seeders, leechers, size = [
                                    tryInt(n, n) for n in [
                                        tr.find('div',
                                                class_=x).get_text().strip()
                                        for x in 'box_s2', 'box_l2',
                                        'box_meret2'
                                    ]
                                ]
                                if self._peers_fail(mode, seeders, leechers):
                                    continue

                                anchor = tr.find('a', href=rc['info'])
                                title = (anchor.get('title')
                                         or anchor.get_text()).strip()
                                download_url = self._link(
                                    anchor.get('href').replace(
                                        'details', 'download')) + key
                            except (AttributeError, TypeError, ValueError):
                                continue

                            if title and download_url:
                                items[mode].append(
                                    (title, download_url, seeders,
                                     self._bytesizer(size)))
Esempio n. 37
0
    def html(self, mode, search_string, results):

        if 'Content-Type' in self.session.headers:
            del (self.session.headers['Content-Type'])
        setattr(
            self.session, 'reserved', {
                'headers': {
                    'Accept': 'text/html, application/xhtml+xml, */*',
                    'Accept-Language': 'en-GB',
                    'Cache-Control': 'no-cache',
                    'Referer': 'https://broadcasthe.net/login.php',
                    'User-Agent': self.ua
                }
            })
        self.headers = None

        if self.auth_html or self._authorised_html():
            del (self.session.reserved['headers']['Referer'])
            if 'Referer' in self.session.headers:
                del (self.session.headers['Referer'])
            self.auth_html = True

            search_string = isinstance(
                search_string,
                unicode) and unidecode(search_string) or search_string
            search_url = self.urls['search'] % (search_string,
                                                self._categories_string(
                                                    mode, 'filter_cat[%s]=1'))

            html = self.get_url(search_url, use_tmr_limit=False)
            if self.should_skip(log_warning=False, use_tmr_limit=False):
                return results

            cnt = len(results)
            try:
                if not html or self._has_no_results(html):
                    raise generic.HaltParseException

                with BS4Parser(html) as soup:
                    tbl = soup.find(id='torrent_table')
                    tbl_rows = [] if not tbl else tbl.find_all('tr')

                    if 2 > len(tbl_rows):
                        raise generic.HaltParseException

                    rc = dict(
                        (k, re.compile('(?i)' + v)) for (k, v) in {
                            'cats':
                            r'(?i)cat\[(?:%s)\]' % self._categories_string(
                                mode, template='', delimiter='|'),
                            'get':
                            'download'
                        }.items())

                    head = None
                    for tr in tbl_rows[1:]:
                        cells = tr.find_all('td')
                        if 5 > len(cells):
                            continue
                        try:
                            head = head if None is not head else self._header_row(
                                tr)
                            seeders, leechers, size = [
                                tryInt(n, n) for n in [
                                    cells[head[x]].get_text().strip()
                                    for x in 'seed', 'leech', 'size'
                                ]
                            ]
                            if not tr.find(
                                    'a', href=rc['cats']) or self._reject_item(
                                        seeders,
                                        leechers,
                                        container=self.reject_m2ts and
                                        (re.search(r'(?i)\[.*?m2?ts.*?\]',
                                                   tr.get_text('',
                                                               strip=True)))):
                                continue

                            title = tr.select('td span[title]')[0].attrs.get(
                                'title').strip()
                            download_url = self._link(
                                tr.find('a', href=rc['get'])['href'])
                        except (AttributeError, TypeError, ValueError,
                                KeyError, IndexError):
                            continue

                        if title and download_url:
                            results.append((title, download_url, seeders,
                                            self._bytesizer(size)))
Esempio n. 38
0
    def _search_provider(self, search_params, **kwargs):

        results = []
        if not self._authorised():
            return results

        items = {'Cache': [], 'Season': [], 'Episode': [], 'Propers': []}

        rc = dict((k, re.compile('(?i)' + v)) for (k, v) in {
            'info': 'detail',
            'get': 'download',
            'fl': 'free'
        }.items())
        for mode in search_params.keys():
            for search_string in search_params[mode]:
                search_string = isinstance(
                    search_string,
                    unicode) and unidecode(search_string) or search_string
                search_string = re.sub('(?i)[^a-z0-9\s]', '%',
                                       unquote_plus(search_string))

                kwargs = dict(
                    post_data={
                        'keywords': search_string,
                        'do': 'quick_sort',
                        'page': '0',
                        'category': '0',
                        'search_type': 't_name',
                        'sort': 'added',
                        'order': 'desc',
                        'daysprune': '-1'
                    })

                vals = [i for i in range(5, 16)]
                random.SystemRandom().shuffle(vals)
                attempts = html = soup = torrent_table = None
                fetch = 'failed fetch'
                for attempts, s in enumerate((0, vals[0], vals[5], vals[10])):
                    time.sleep(s)
                    html = self.get_url(self.urls['search'], **kwargs)
                    if self.should_skip():
                        return results
                    if html:
                        soup = BeautifulSoup(html, 'html.parser')
                        torrent_table = soup.find('table', id='sortabletable')
                        if torrent_table:
                            fetch = 'data fetched'
                            break
                if attempts:
                    logger.log('%s %s after %s attempts' %
                               (mode, fetch, attempts + 1))

                cnt = len(items[mode])
                try:
                    if not html or self._has_no_results(
                            html) or not torrent_table:
                        raise generic.HaltParseException

                    torrent_rows = torrent_table.find_all('tr')
                    get_detail = True

                    if 2 > len(torrent_rows):
                        raise generic.HaltParseException

                    head = None
                    for tr in torrent_rows[1:]:
                        cells = tr.find_all('td')
                        if 6 > len(cells):
                            continue
                        try:
                            head = head if None is not head else self._header_row(
                                tr)
                            seeders, leechers, size = [
                                tryInt(n, n) for n in [
                                    cells[head[x]].get_text().strip()
                                    for x in 'seed', 'leech', 'size'
                                ]
                            ]
                            if self._reject_item(
                                    seeders, leechers, self.freeleech
                                    and (None is cells[1].find(
                                        'img', title=rc['fl']))):
                                continue

                            info = tr.find('a', href=rc['info'])
                            title = (tr.find(
                                'div', class_='tooltip-content').get_text()
                                     or info.get_text()).strip()
                            title = re.findall('(?m)(^[^\r\n]+)', title)[0]
                            download_url = self._link(
                                tr.find('a', href=rc['get'])['href'])
                        except (StandardError, Exception):
                            continue

                        if get_detail and title.endswith('...'):
                            try:
                                with BS4Parser(
                                        self.get_url('%s%s' % (
                                            self.
                                            urls['config_provider_home_uri'],
                                            info['href'].lstrip('/').replace(
                                                self.urls[
                                                    'config_provider_home_uri'],
                                                ''))),
                                        'html.parser') as soup_detail:
                                    title = soup_detail.find(
                                        'td',
                                        class_='thead',
                                        attrs={
                                            'colspan': '3'
                                        }).get_text().strip()
                                    title = re.findall('(?m)(^[^\r\n]+)',
                                                       title)[0]
                            except IndexError:
                                continue
                            except (StandardError, Exception):
                                get_detail = False

                        try:
                            titles = self.regulate_title(
                                title, mode, search_string)
                            if download_url and titles:
                                for title in titles:
                                    items[mode].append(
                                        (title, download_url, seeders,
                                         self._bytesizer(size)))
                        except (StandardError, Exception):
                            pass

                except generic.HaltParseException:
                    pass
Esempio n. 39
0
    def _search_provider(self, search_params, **kwargs):

        results = []
        if not self._authorised():
            return results

        items = {'Cache': [], 'Season': [], 'Episode': [], 'Propers': []}

        rc = dict(
            (k, re.compile('(?i)' + v)) for (k, v) in {
                'info': 'details',
                'get': 'dl.php',
                'snatch': 'snatches',
                'seeders': r'(^\d+)',
                'leechers': r'(\d+)$'
            }.items())
        log = ''
        if self.filter:
            non_marked = 'f0' in self.filter
            # if search_any, use unselected to exclude, else use selected to keep
            filters = ([f for f in self.may_filter if f in self.filter],
                       [f for f in self.may_filter
                        if f not in self.filter])[non_marked]
            rc['filter'] = re.compile('(?i)(%s)' % '|'.join([
                self.may_filter[f][2] for f in filters if self.may_filter[f][1]
            ]))
            log = '%sing (%s) ' % (('keep', 'skipp')[non_marked], ', '.join(
                [self.may_filter[f][0] for f in filters]))
        for mode in search_params.keys():
            rc['cats'] = re.compile(
                '(?i)cat=(?:%s)' %
                self._categories_string(mode, template='', delimiter='|'))
            for search_string in search_params[mode]:
                search_string = isinstance(
                    search_string,
                    unicode) and unidecode(search_string) or search_string

                search_url = self.urls['search'] % ('+'.join(
                    search_string.split()), self._categories_string(mode))
                html = self.get_url(search_url)
                if self.should_skip():
                    return results

                time.sleep(2)
                if not self.has_all_cookies(['session_key']):
                    if not self._authorised():
                        return results
                    html = self.get_url(search_url)
                    if self.should_skip():
                        return results

                cnt = len(items[mode])
                try:
                    if not html or self._has_no_results(html):
                        raise generic.HaltParseException

                    with BS4Parser(html, features=['html5lib',
                                                   'permissive']) as soup:
                        torrent_table = soup.find('table', id='tortable')
                        torrent_rows = [] if not torrent_table else torrent_table.find_all(
                            'tr')

                        if 2 > len(torrent_rows):
                            raise generic.HaltParseException

                        head = None
                        for tr in torrent_rows[1:]:
                            cells = tr.find_all('td')
                            if 4 > len(cells):
                                continue
                            if any(self.filter):
                                marker = ''
                                try:
                                    marker = tr.select('a[href^="browse"] .tip'
                                                       )[0].get_text().strip()
                                except (StandardError, Exception):
                                    pass
                                if ((non_marked
                                     and rc['filter'].search(marker)) or
                                    (not non_marked
                                     and not rc['filter'].search(marker))):
                                    continue
                            try:
                                head = head if None is not head else self._header_row(
                                    tr)
                                seeders, leechers = 2 * [
                                    cells[head['seed']].get_text().strip()
                                ]
                                seeders, leechers = [
                                    tryInt(n) for n in [
                                        rc['seeders'].findall(seeders)[0],
                                        rc['leechers'].findall(leechers)[0]
                                    ]
                                ]
                                if self._peers_fail(mode, seeders, leechers) or\
                                        not rc['cats'].findall(tr.find('td').get('onclick', ''))[0]:
                                    continue

                                title = tr.find(
                                    'a', href=rc['info']).get_text().strip()
                                snatches = tr.find(
                                    'a', href=rc['snatch']).get_text().strip()
                                size = cells[
                                    head['size']].get_text().strip().replace(
                                        snatches, '')
                                download_url = self._link(
                                    tr.find('a', href=rc['get'])['href'])
                            except (AttributeError, TypeError, ValueError,
                                    IndexError):
                                continue

                            if title and download_url:
                                items[mode].append(
                                    (title, download_url, seeders,
                                     self._bytesizer(size)))

                except generic.HaltParseException:
                    pass
                except (StandardError, Exception):
                    logger.log(
                        u'Failed to parse. Traceback: %s' %
                        traceback.format_exc(), logger.ERROR)

                self._log_search(mode,
                                 len(items[mode]) - cnt,
                                 log + self.session.response.get('url'))

            results = self._sort_seeding(mode, results + items[mode])

        return results
Esempio n. 40
0
    def _search_provider(self, search_params, **kwargs):

        results = []
        if not self._authorised():
            return results

        items = {'Cache': [], 'Season': [], 'Episode': [], 'Propers': []}

        rc = dict((k, re.compile('(?i)' + v)) for (k, v) in {
            'info': 'detail',
            'get': 'download'
        }.items())
        for mode in search_params.keys():
            for search_string in search_params[mode]:
                search_string = isinstance(
                    search_string,
                    unicode) and unidecode(search_string) or search_string
                search_url = self.urls['browse'] % (
                    self._categories_string(), ('3', '0')[not self.freeleech],
                    (self.urls['search'] % search_string, '')['Cache' == mode])

                html = self.get_url(search_url)
                if self.should_skip():
                    return results

                cnt = len(items[mode])
                try:
                    if not html or self._has_no_results(html):
                        raise generic.HaltParseException

                    html = html.replace(
                        '<?xml version="1.0" encoding="iso-8859-1"?>', '')
                    html = re.sub(r'(</td>)[^<]*</td>', r'\1', html)
                    html = re.sub(r'(<a[^<]*)<a[^<]*?href=details[^<]*', r'\1',
                                  html)
                    with BS4Parser(html, 'html.parser') as soup:
                        shows_found = False
                        torrent_rows = soup.find_all('tr')
                        for index, row in enumerate(torrent_rows):
                            if 'type' == row.find_all(
                                    'td')[0].get_text().strip().lower():
                                shows_found = index
                                break

                        if not shows_found or 2 > (len(torrent_rows) -
                                                   shows_found):
                            raise generic.HaltParseException

                        head = None
                        for tr in torrent_rows[1 + shows_found:]:
                            cells = tr.find_all('td')
                            if 4 > len(cells):
                                continue
                            try:
                                head = head if None is not head else self._header_row(
                                    torrent_rows[shows_found])
                                seeders, leechers, size = [
                                    tryInt(n, n) for n in [
                                        cells[head[x]].get_text().strip()
                                        for x in 'seed', 'leech', 'size'
                                    ]
                                ]
                                if self._reject_item(seeders, leechers):
                                    continue

                                info = tr.find('a', href=rc['info'])
                                title = (info.attrs.get('title')
                                         or info.get_text()).strip()
                                download_url = self._link(
                                    tr.find('a', href=rc['get'])['href'])
                            except (AttributeError, TypeError, ValueError,
                                    KeyError):
                                continue

                            if title and download_url:
                                items[mode].append(
                                    (title, download_url, seeders,
                                     self._bytesizer(size)))
Esempio n. 41
0
    def _search_provider(self, search_params, search_mode='eponly', epcount=0, **kwargs):

        results = []
        if not self.url:
            return results

        items = {'Cache': [], 'Season': [], 'Episode': [], 'Propers': []}

        rc = dict((k, re.compile('(?i)' + v)) for (k, v) in {
            'info': 'detail', 'get': 'download[^"]+magnet', 'tid': r'.*/(\d{5,}).*',
            'verify': '(?:helper|moderator|trusted|vip)', 'size': 'size[^\d]+(\d+(?:[.,]\d+)?\W*[bkmgt]\w+)'}.items())

        for mode in search_params.keys():
            for search_string in search_params[mode]:
                search_string = isinstance(search_string, unicode) and unidecode(search_string) or search_string

                search_url = self.urls['browse'] if 'Cache' == mode \
                    else self.urls['search'] % (urllib.quote(search_string))
                html = self.get_url(search_url)
                if self.should_skip():
                    return results

                cnt = len(items[mode])
                try:
                    if not html or self._has_no_results(html):
                        self._url = None
                        raise generic.HaltParseException

                    with BS4Parser(html, features=['html5lib', 'permissive'], attr='id="searchResult"') as soup:
                        torrent_table = soup.find(id='searchResult')
                        torrent_rows = [] if not torrent_table else torrent_table.find_all('tr')

                        if 2 > len(torrent_rows):
                            raise generic.HaltParseException

                        head = None
                        for tr in torrent_table.find_all('tr')[1:]:
                            cells = tr.find_all('td')
                            if 3 > len(cells):
                                continue
                            try:
                                head = head if None is not head else self._header_row(tr)
                                seeders, leechers = [tryInt(cells[head[x]].get_text().strip()) for x in 'seed', 'leech']
                                if self._reject_item(seeders, leechers):
                                    continue

                                info = tr.find('a', title=rc['info'])
                                title = info.get_text().strip().replace('_', '.')
                                tid = rc['tid'].sub(r'\1', str(info['href']))
                                download_magnet = tr.find('a', title=rc['get'])['href']
                            except (AttributeError, TypeError, ValueError):
                                continue

                            if self.confirmed and not tr.find('img', title=rc['verify']):
                                logger.log(u'Skipping untrusted non-verified result: ' + title, logger.DEBUG)
                                continue

                            # Check number video files = episode in season and
                            # find the real Quality for full season torrent analyzing files in torrent
                            if 'Season' == mode and 'sponly' == search_mode:
                                ep_number = int(epcount / len(set(show_name_helpers.allPossibleShowNames(self.show))))
                                title = self._find_season_quality(title, tid, ep_number)

                            if title and download_magnet:
                                size = None
                                try:
                                    size = rc['size'].findall(tr.find_all(class_='detDesc')[0].get_text())[0]
                                except (StandardError, Exception):
                                    pass

                                items[mode].append((title, download_magnet, seeders, self._bytesizer(size)))

                except generic.HaltParseException:
                    pass
                except (StandardError, Exception):
                    logger.log(u'Failed to parse. Traceback: %s' % traceback.format_exc(), logger.ERROR)
                self._log_search(mode, len(items[mode]) - cnt, search_url)

            results = self._sort_seeding(mode, results + items[mode])

        return results
Esempio n. 42
0
    def _search_provider(self, search_params, **kwargs):

        results = []
        if not self._authorised():
            return results

        items = {'Cache': [], 'Season': [], 'Episode': [], 'Propers': []}

        rc = dict(
            (k, re.compile('(?i)' + v)) for (k, v) in {
                'info':
                'detail',
                'get':
                '.*id=(\d+).*',
                'fl':
                '\[freeleech\]',
                'cats':
                'cat=(?:%s)' %
                self._categories_string(template='', delimiter='|')
            }.items())
        for mode in search_params.keys():
            for search_string in search_params[mode]:
                search_string = isinstance(
                    search_string,
                    unicode) and unidecode(search_string) or search_string

                post_data = self.urls['params'].copy()
                post_data.update(
                    ast.literal_eval('{%s}' % self._categories_string(
                        template='"c%s": "1"', delimiter=',')))
                if 'Cache' != mode:
                    search_string = '+'.join(search_string.split())
                    post_data['search'] = search_string

                if self.freeleech:
                    post_data.update({'freeleech': 'on'})

                self.session.headers.update({
                    'Referer':
                    self.url + 'browse.php',
                    'X-Requested-With':
                    'XMLHttpRequest'
                })
                html = self.get_url(self.urls['browse'], post_data=post_data)

                cnt = len(items[mode])
                try:
                    if not html or self._has_no_results(html):
                        raise generic.HaltParseException

                    with BS4Parser(html, features=['html5lib',
                                                   'permissive']) as soup:
                        torrent_table = soup.find('table',
                                                  attrs={'cellpadding': 5})
                        torrent_rows = [] if not torrent_table else torrent_table.find_all(
                            'tr')

                        if 2 > len(torrent_rows):
                            raise generic.HaltParseException

                        for tr in torrent_rows[1:]:
                            try:
                                seeders, leechers, size = [
                                    tryInt(n, n) for n in [
                                        tr.find_all('td')
                                        [x].get_text().strip()
                                        for x in (-2, -1, -3)
                                    ]
                                ]
                                if None is tr.find('a', href=rc['cats'])\
                                        or self.freeleech and None is rc['fl'].search(tr.find_all('td')[1].get_text())\
                                        or self._peers_fail(mode, seeders, leechers):
                                    continue

                                info = tr.find('a', href=rc['info'])
                                title = 'title' in info.attrs and info.attrs[
                                    'title'] or info.get_text().strip()

                                download_url = self.urls['get'] % {
                                    'id':
                                    re.sub(rc['get'], r'\1',
                                           str(info.attrs['href'])),
                                    'title':
                                    str(title).replace(' ', '.')
                                }
                            except (AttributeError, TypeError, ValueError):
                                continue

                            if title and download_url:
                                items[mode].append(
                                    (title, download_url, seeders,
                                     self._bytesizer(size)))

                except generic.HaltParseException:
                    pass
                except Exception:
                    logger.log(
                        u'Failed to parse. Traceback: %s' %
                        traceback.format_exc(), logger.ERROR)

                self._log_search(mode,
                                 len(items[mode]) - cnt,
                                 ('search string: ' + search_string,
                                  self.name)['Cache' == mode])

            self._sort_seeders(mode, items)

            results = list(set(results + items[mode]))

        return results
Esempio n. 43
0
    def _search_provider(self, search_params, **kwargs):

        results = []
        if not self._authorised():
            return results

        items = {'Cache': [], 'Season': [], 'Episode': [], 'Propers': []}

        rc = dict((k, re.compile('(?i)' + v)) for (k, v) in {
            'info': 'detail',
            'get': 'download',
            'fl': 'free'
        }.items())
        for mode in search_params.keys():
            save_url, restore = self._set_categories(mode)
            if self.should_skip():
                return results
            for search_string in search_params[mode]:
                search_string = search_string.replace(u'ÂŁ', '%')
                search_string = re.sub(r'[\s.]+', '%', search_string)
                search_string = isinstance(
                    search_string,
                    unicode) and unidecode(search_string) or search_string

                kwargs = dict(
                    post_data={
                        'keywords': search_string,
                        'do': 'quick_sort',
                        'page': '0',
                        'category': '0',
                        'search_type': 't_name',
                        'sort': 'added',
                        'order': 'desc',
                        'daysprune': '-1'
                    })

                html = self.get_url(self.urls['search'], **kwargs)
                if self.should_skip():
                    return results

                cnt = len(items[mode])
                try:
                    if not html or self._has_no_results(html):
                        raise generic.HaltParseException

                    parse_only = dict(table={
                        'id': (lambda at: at and 'sortabletable' in at)
                    })
                    with BS4Parser(html, parse_only=parse_only) as tbl:
                        tbl_rows = [] if not tbl else tbl.find_all('tr')
                        get_detail = True

                        if 2 > len(tbl_rows):
                            raise generic.HaltParseException

                        head = None
                        for tr in tbl_rows[1:]:
                            cells = tr.find_all('td')
                            if 6 > len(cells):
                                continue
                            try:
                                head = head if None is not head else self._header_row(
                                    tr)
                                seeders, leechers, size = [
                                    tryInt(n, n) for n in [
                                        cells[head[x]].get_text().strip()
                                        for x in 'seed', 'leech', 'size'
                                    ]
                                ]
                                if self._reject_item(
                                        seeders, leechers, self.freeleech
                                        and (None is cells[1].find(
                                            'img', title=rc['fl']))):
                                    continue

                                info = tr.find('a', href=rc['info'])
                                title = (tr.find(
                                    'div',
                                    class_='tooltip-content').get_text()
                                         or info.get_text()).strip()
                                title = re.findall('(?m)(^[^\r\n]+)', title)[0]
                                download_url = self._link(
                                    tr.find('a', href=rc['get'])['href'])
                            except (BaseException, Exception):
                                continue

                            if get_detail and title.endswith('...'):
                                try:
                                    with BS4Parser(
                                            self.get_url('%s%s' % (
                                                self.urls[
                                                    'config_provider_home_uri'],
                                                info['href'].lstrip('/').
                                                replace(
                                                    self.urls[
                                                        'config_provider_home_uri'],
                                                    '')))) as soup_detail:
                                        title = soup_detail.find(
                                            'td',
                                            class_='thead',
                                            attrs={
                                                'colspan': '3'
                                            }).get_text().strip()
                                        title = re.findall(
                                            '(?m)(^[^\r\n]+)', title)[0]
                                except IndexError:
                                    continue
                                except (BaseException, Exception):
                                    get_detail = False

                            title = self.regulate_title(title)
                            if download_url and title:
                                items[mode].append(
                                    (title, download_url, seeders,
                                     self._bytesizer(size)))
Esempio n. 44
0
    def _doSearch(self, search_params):

        results = []
        items = {'Season': [], 'Episode': []}

        freeleech = '&free=on' if sickbeard.IPTORRENTS_FREELEECH else ''

        if not self._doLogin():
            return []

        for mode in search_params.keys():
            for search_string in search_params[mode]:

                # URL with 50 tv-show results, or max 150 if adjusted in IPTorrents profile
                searchURL = self.urls['search'] % (
                    self.categorie, freeleech,
                    unidecode(search_string)) + ';o=seeders'

                logger.log(u"" + self.name + " search page URL: " + searchURL,
                           logger.DEBUG)

                data = self.getURL(searchURL)
                if not data:
                    continue

                try:
                    html = BeautifulSoup(data,
                                         features=["html5lib", "permissive"])

                    if not html:
                        logger.log(u"Invalid HTML data: " + str(data),
                                   logger.DEBUG)
                        continue

                    if html.find(text='Nothing found!'):
                        logger.log(
                            u"No results found for: " + search_string + " (" +
                            searchURL + ")", logger.DEBUG)
                        continue

                    torrent_table = html.find('table',
                                              attrs={'class': 'torrents'})
                    torrents = torrent_table.find_all(
                        'tr') if torrent_table else []

                    if not torrents:
                        #                        logger.log(u"The data returned from " + self.name + " is incomplete, this result is unusable", logger.DEBUG)
                        continue

                    for result in torrents[1:]:

                        torrent = result.find_all('td')[1].find('a')

                        torrent_name = torrent.string
                        torrent_download_url = self.urls['base_url'] + (
                            result.find_all('td')[3].find('a'))['href']
                        torrent_details_url = self.urls['base_url'] + torrent[
                            'href']
                        torrent_seeders = int(
                            result.find('td', attrs={
                                'class': 'ac t_seeders'
                            }).string)

                        ## Not used, perhaps in the future ##
                        #torrent_id = int(torrent['href'].replace('/details.php?id=', ''))
                        #torrent_leechers = int(result.find('td', attrs = {'class' : 'ac t_leechers'}).string)

                        # Filter unseeded torrent and torrents with no name/url
                        if torrent_seeders == 0 or not torrent_name or not torrent_download_url:
                            continue

                        item = torrent_name, torrent_download_url
                        logger.log(
                            u"Found result: " + torrent_name + " (" +
                            torrent_details_url + ")", logger.DEBUG)
                        items[mode].append(item)

                except Exception, e:
                    logger.log(
                        u"Failed parsing " + self.name +
                        (" Exceptions: " + str(e) if e else ''), logger.ERROR)

            results += items[mode]
Esempio n. 45
0
    def _doSearch(self,
                  search_params,
                  search_mode='eponly',
                  epcount=0,
                  age=0,
                  epObj=None):

        results = []
        items = {'Season': [], 'Episode': [], 'RSS': []}

        if not self._doLogin():
            return results

        for mode in search_params.keys():
            for search_string in search_params[mode]:

                if isinstance(search_string, unicode):
                    search_string = unidecode(search_string)

                searchURL = self.urls['search'] % (search_string,
                                                   self.catagories)

                data = self.getURL(searchURL)
                if not data:
                    continue

                try:
                    with BS4Parser(data, features=["html5lib",
                                                   "permissive"]) as html:
                        torrent_table = html.find(
                            'table', attrs={'id': 'torrent_table'})
                        torrent_rows = torrent_table.find_all(
                            'tr') if torrent_table else []

                        #Continue only if one Release is found
                        if len(torrent_rows) < 2:
                            logger.log(
                                u"The Data returned from " + self.name +
                                " does not contain any torrents", logger.DEBUG)
                            continue

                        for result in torrent_rows[1:]:
                            cells = result.find_all('td')
                            link = result.find('a', attrs={'dir': 'ltr'})
                            url = result.find('a', attrs={'title': 'Download'})

                            try:
                                title = link.contents[0]
                                download_url = self.urls['download'] % (
                                    url['href'])
                                id = link['href'][-6:]
                                seeders = cells[len(cells) - 2].contents[0]
                                leechers = cells[len(cells) - 1].contents[0]
                            except (AttributeError, TypeError):
                                continue

                            #Filter unseeded torrent
                            if mode != 'RSS' and (seeders < self.minseed
                                                  or leechers < self.minleech):
                                continue

                            if not title or not download_url:
                                continue

                            item = title, download_url, id, seeders, leechers
                            logger.log(
                                u"Found result: " + title + "(" + searchURL +
                                ")", logger.DEBUG)

                            items[mode].append(item)

                except Exception, e:
                    logger.log(
                        u"Failed parsing " + self.name + " Traceback: " +
                        traceback.format_exc(), logger.ERROR)

            #For each search mode sort all the items by seeders
            items[mode].sort(key=lambda tup: tup[3], reverse=True)

            results += items[mode]
Esempio n. 46
0
    def _search_provider(self, search_params, **kwargs):

        results = []

        items = {'Cache': [], 'Season': [], 'Episode': [], 'Propers': []}

        rc = dict(
            (k, re.compile('(?i)' + v)) for (k, v) in {
                'abd': '(\d{4}(?:[.]\d{2}){2})',
                'peers': 'Seed[^\d]*(\d+)[\w\W]*?Leech[^\d]*(\d+)',
                'info': '(\w+)[.]html'
            }.items())
        for mode in search_params.keys():
            for search_string in search_params[mode]:
                search_string = isinstance(
                    search_string,
                    unicode) and unidecode(search_string) or search_string
                search_string = '+'.join(rc['abd'].sub(r'%22\1%22',
                                                       search_string).split())
                search_url = self.urls['search'] % (
                    search_string, self._categories_string(mode, '', ','))

                html = self.get_url(search_url)

                cnt = len(items[mode])
                try:
                    if not html or self._has_no_results(html):
                        raise generic.HaltParseException
                    html = html.replace('</a> </i>', '</a>').replace(
                        '"href=', '" href=').replace('"style', '" style')
                    with BS4Parser(html, features=['html5lib',
                                                   'permissive']) as soup:
                        torrent_table = soup.find('table',
                                                  class_='table-torrents')
                        torrent_rows = [] if not torrent_table else torrent_table.find_all(
                            'tr')

                        if 2 > len(torrent_rows):
                            raise generic.HaltParseException

                        head = None
                        for tr in torrent_rows[1:]:
                            cells = tr.find_all('td')
                            if 4 > len(cells):
                                continue
                            try:
                                head = head if None is not head else self._header_row(
                                    tr, {
                                        'peers': r'(?:zqf\-cloud)',
                                        'size': r'(?:zqf\-files)'
                                    })
                                stats = rc['peers'].findall(
                                    (cells[head['peers']].find(
                                        class_='progress')
                                     or {}).get('title', ''))
                                seeders, leechers = any(stats) and [
                                    tryInt(x) for x in stats[0]
                                ] or (0, 0)
                                if self._peers_fail(mode, seeders, leechers):
                                    continue

                                info = cells[1].find(
                                    'a', href=rc['info']) or cells[0].find(
                                        'a', href=rc['info'])
                                title = info and info.get_text().strip()
                                size = cells[head['size']].get_text().strip()
                                download_url = info and (
                                    self.urls['get'] %
                                    rc['info'].findall(info['href'])[0])
                            except (AttributeError, TypeError, ValueError,
                                    IndexError):
                                continue

                            if title and download_url:
                                items[mode].append(
                                    (title, download_url, seeders,
                                     self._bytesizer(size)))

                except generic.HaltParseException:
                    pass
                except (StandardError, Exception):
                    logger.log(
                        u'Failed to parse. Traceback: %s' %
                        traceback.format_exc(), logger.ERROR)

                self._log_search(mode, len(items[mode]) - cnt, search_url)

            results = self._sort_seeding(mode, results + items[mode])

        return results
Esempio n. 47
0
    def _doSearch(self,
                  search_params,
                  search_mode='eponly',
                  epcount=0,
                  age=0,
                  epObj=None):

        results = []
        items = {'Season': [], 'Episode': [], 'RSS': []}

        for mode in search_params.keys():
            for search_string in search_params[mode]:
                if isinstance(search_string, unicode):
                    search_string = unidecode(search_string)

                if mode != 'RSS':
                    searchURL = self.url + 'usearch/%s/?field=seeders&sorder=desc&rss=1' % urllib.quote_plus(
                        search_string)
                else:
                    searchURL = self.url + 'tv/?field=time_add&sorder=desc&rss=1'

                logger.log(u"Search string: " + searchURL, logger.DEBUG)

                try:
                    entries = self.cache.getRSSFeed(searchURL)['entries']
                    for item in entries or []:
                        try:
                            link = item['link']
                            id = item['guid']
                            title = item['title']
                            url = item['torrent_magneturi']
                            verified = bool(int(item['torrent_verified']) or 0)
                            seeders = int(item['torrent_seeds'])
                            leechers = int(item['torrent_peers'])
                            size = int(item['torrent_contentlength'])
                        except (AttributeError, TypeError):
                            continue

                        if mode != 'RSS' and (seeders < self.minseed
                                              or leechers < self.minleech):
                            continue

                        if self.confirmed and not verified:
                            logger.log(
                                u"KAT Provider found result " + title +
                                " but that doesn't seem like a verified result so I'm ignoring it",
                                logger.DEBUG)
                            continue

                        #Check number video files = episode in season and find the real Quality for full season torrent analyzing files in torrent
                        if mode == 'Season' and search_mode == 'sponly':
                            ep_number = int(
                                epcount /
                                len(set(allPossibleShowNames(self.show))))
                            title = self._find_season_quality(
                                title, link, ep_number)

                        if not title or not url:
                            continue

                        try:
                            pubdate = datetime.datetime(
                                *item['published_parsed'][0:6])
                        except AttributeError:
                            try:
                                pubdate = datetime.datetime(
                                    *item['updated_parsed'][0:6])
                            except AttributeError:
                                try:
                                    pubdate = datetime.datetime(
                                        *item['created_parsed'][0:6])
                                except AttributeError:
                                    try:
                                        pubdate = datetime.datetime(
                                            *item['date'][0:6])
                                    except AttributeError:
                                        pubdate = datetime.datetime.today()

                        item = title, url, id, seeders, leechers, size, pubdate

                        items[mode].append(item)

                except Exception, e:
                    logger.log(
                        u"Failed to parsing " + self.name + " Traceback: " +
                        traceback.format_exc(), logger.ERROR)

            #For each search mode sort all the items by seeders
            items[mode].sort(key=lambda tup: tup[3], reverse=True)

            results += items[mode]
Esempio n. 48
0
    def _search_provider(self, search_params, **kwargs):

        results = []
        if not self._authorised():
            return results

        items = {'Cache': [], 'Season': [], 'Episode': [], 'Propers': []}

        rc = dict((k, re.compile('(?i)' + v))
                  for (k, v) in {'get': 'download'}.items())
        for mode in search_params.keys():
            for search_string in search_params[mode]:
                search_url = self.urls[
                    ('search', 'browse')['Cache' == mode]] % {
                        'cats':
                        self._categories_string(mode, '', ','),
                        'query':
                        isinstance(search_string, unicode)
                        and unidecode(search_string) or search_string
                    }

                html = self.get_url(search_url)

                cnt = len(items[mode])
                try:
                    if not html or self._has_no_results(html):
                        raise generic.HaltParseException

                    with BS4Parser(html, features=['html5lib',
                                                   'permissive']) as soup:
                        torrent_table = soup.find(id='torrenttable')
                        torrent_rows = [] if not torrent_table else torrent_table.find_all(
                            'tr')

                        if 2 > len(torrent_rows):
                            raise generic.HaltParseException

                        head = None
                        for tr in torrent_rows[1:]:
                            cells = tr.find_all('td')
                            if 6 > len(cells):
                                continue
                            try:
                                head = head if None is not head else self._header_row(
                                    tr)
                                seeders, leechers = [
                                    tryInt(n) for n in [
                                        tr.find('td',
                                                class_=x).get_text().strip()
                                        for x in 'seeders', 'leechers'
                                    ]
                                ]
                                if self._peers_fail(mode, seeders, leechers):
                                    continue

                                info = tr.find('td', class_='name').a
                                title = (info.attrs.get('title')
                                         or info.get_text()).strip()
                                size = cells[head['size']].get_text().strip()
                                download_url = self._link(
                                    tr.find('a', href=rc['get'])['href'])
                            except (AttributeError, TypeError, ValueError):
                                continue

                            if title and download_url:
                                items[mode].append(
                                    (title, download_url, seeders,
                                     self._bytesizer(size)))
Esempio n. 49
0
    def _search_provider(self, search_params, **kwargs):

        results = []
        if not self._authorised():
            return results

        items = {'Cache': [], 'Season': [], 'Episode': [], 'Propers': []}

        rc = dict((k, re.compile('(?i)' + v)) for (k, v) in {
            'info': 'details',
            'get': 'download'
        }.items())
        for mode in search_params.keys():
            rc['cats'] = re.compile(
                '(?i)cat=(?:%s)' %
                self._categories_string(mode, template='', delimiter='|'))
            for search_string in search_params[mode]:
                search_string = isinstance(
                    search_string,
                    unicode) and unidecode(search_string) or search_string

                html = self.get_url(
                    self.urls['search'] %
                    ('+'.join(search_string.split()),
                     self._categories_string(mode, template='cats[]=%s')))

                cnt = len(items[mode])
                try:
                    if not html or self._has_no_results(html):
                        raise generic.HaltParseException

                    with BS4Parser(html, features=['html5lib',
                                                   'permissive']) as soup:
                        torrent_rows = soup.find_all('div', 'torrentrow')

                        if not len(torrent_rows):
                            raise generic.HaltParseException

                        for tr in torrent_rows:
                            cells = tr.select('span[style*="cell"]')
                            if 6 > len(cells):
                                continue
                            try:
                                seeders, leechers, size = [
                                    tryInt(n, n) for n in [
                                        cells[x].get_text().strip()
                                        for x in -3, -2, -5
                                    ]
                                ]
                                if self._peers_fail(mode, seeders,
                                                    leechers) or not tr.find(
                                                        'a', href=rc['cats']):
                                    continue

                                title = tr.find(
                                    'a', href=rc['info']).get_text().strip()
                                download_url = self._link(
                                    tr.find('a', href=rc['get'])['href'])
                            except (AttributeError, TypeError, ValueError,
                                    IndexError):
                                continue

                            if title and download_url:
                                items[mode].append(
                                    (title, download_url, seeders,
                                     self._bytesizer(size)))
Esempio n. 50
0
    def _do_search(self,
                   search_params,
                   search_mode='eponly',
                   epcount=0,
                   age=0):

        results = []
        if not self._do_login():
            return results

        items = {'Season': [], 'Episode': [], 'Cache': []}

        rc = dict((k, re.compile('(?i)' + v)) for (k, v) in {
            'info': 'detail',
            'get': 'download'
        }.items())
        for mode in search_params.keys():
            for search_string in search_params[mode]:

                if isinstance(search_string, unicode):
                    search_string = unidecode(search_string)

                search_url = self.urls['search'] % (search_string,
                                                    self.categories)
                html = self.get_url(search_url)

                cnt = len(items[mode])
                try:
                    if not html or self._has_no_results(html):
                        raise generic.HaltParseException

                    with BS4Parser(html, 'html.parser') as soup:
                        torrent_table = soup.find('table',
                                                  attrs={'class': 'koptekst'})
                        torrent_rows = [] if not torrent_table else torrent_table.find_all(
                            'tr')

                        if 2 > len(torrent_rows):
                            raise generic.HaltParseException

                        for tr in torrent_rows[1:]:
                            try:
                                seeders, leechers = [
                                    int(
                                        tr.find_all('td')
                                        [x].get_text().strip())
                                    for x in (-3, -2)
                                ]
                                if 'Cache' != mode and (
                                        seeders < self.minseed
                                        or leechers < self.minleech):
                                    continue

                                info = tr.find('a', href=rc['info'])
                                title = info.get_text().strip()

                                download_url = self.urls['get'] % str(
                                    tr.find(
                                        'a',
                                        href=rc['get'])['href']).lstrip('/')
                            except (AttributeError, TypeError):
                                continue

                            if title and download_url:
                                items[mode].append(
                                    (title, download_url, seeders))

                except generic.HaltParseException:
                    pass
                except Exception:
                    logger.log(
                        u'Failed to parse. Traceback: %s' %
                        traceback.format_exc(), logger.ERROR)
                self._log_result(mode, len(items[mode]) - cnt, search_url)

            # for each search mode sort all the items by seeders
            items[mode].sort(key=lambda tup: tup[2], reverse=True)

            results += items[mode]

        return results
Esempio n. 51
0
    def _search_provider(self, search_params, **kwargs):

        results = []
        if not self._authorised():
            return results

        items = {'Cache': [], 'Season': [], 'Episode': [], 'Propers': []}

        rc = dict((k, re.compile('(?i)' + v))
                  for (k, v) in {'info': '.*?details\s*-\s*', 'get': 'download'}.items())
        log = ''
        if self.filter:
            non_marked = 'f0' in self.filter
            # if search_any, use unselected to exclude, else use selected to keep
            filters = ([f for f in self.may_filter if f in self.filter],
                       [f for f in self.may_filter if f not in self.filter])[non_marked]
            filters += (((all([x in filters for x in 'free', 'double']) and ['freedouble'] or [])
                        + (all([x in filters for x in 'half', 'double']) and ['halfdouble'] or [])),
                        ((not all([x not in filters for x in 'free', 'double']) and ['freedouble'] or [])
                         + (not all([x not in filters for x in 'half', 'double']) and ['halfdouble'] or []))
                        )[non_marked]
            rc['filter'] = re.compile('(?i)^(%s)$' % '|'.join(
                ['%s' % f for f in filters if (f in self.may_filter and self.may_filter[f][1]) or f]))
            log = '%sing (%s) ' % (('keep', 'skipp')[non_marked], ', '.join(
                [f in self.may_filter and self.may_filter[f][0] or f for f in filters]))
        for mode in search_params.keys():
            if mode in ['Season', 'Episode']:
                show_type = self.show.air_by_date and 'Air By Date' \
                    or self.show.is_sports and 'Sports' or self.show.is_anime and 'Anime' or None
                if show_type:
                    logger.log(u'Provider does not carry shows of type: [%s], skipping' % show_type, logger.DEBUG)
                    return results

            for search_string in search_params[mode]:
                search_string = isinstance(search_string, unicode) and unidecode(search_string) or search_string
                search_url = self.urls['search'] % (
                    '+'.join(search_string.split()), self._categories_string(mode, ''))

                html = self.get_url(search_url)
                if self.should_skip():
                    return results

                cnt = len(items[mode])
                try:
                    if not html or self._has_no_results(html):
                        raise generic.HaltParseException

                    with BS4Parser(html, features=['html5lib', 'permissive']) as soup:
                        torrent_table = soup.find('table', class_='table')
                        torrent_rows = [] if not torrent_table else torrent_table.find_all('tr')

                        if 2 > len(torrent_rows):
                            raise generic.HaltParseException

                        head = None
                        for tr in torrent_rows[1:]:
                            cells = tr.find_all('td')
                            if 5 > len(cells) or (self.confirmed and tr.find('i', title=re.compile('(?i)unverified'))):
                                continue
                            if any(self.filter):
                                marked = ','.join([x.attrs.get('title', '').lower() for x in tr.find_all(
                                    'i', attrs={'class': ['fa-star', 'fa-diamond', 'fa-star-half-o']})])
                                munged = ''.join(filter(marked.__contains__, ['free', 'half', 'double']))
                                if ((non_marked and rc['filter'].search(munged)) or
                                        (not non_marked and not rc['filter'].search(munged))):
                                    continue
                            try:
                                head = head if None is not head else self._header_row(tr)
                                seeders, leechers, size = [tryInt(n, n) for n in [
                                    cells[head[x]].get_text().strip() for x in 'seed', 'leech', 'size']]
                                if self._peers_fail(mode, seeders, leechers):
                                    continue

                                title = rc['info'].sub('', tr.find('a', attrs={'title': rc['info']})['title'])
                                download_url = self._link(tr.find('a', href=rc['get'])['href'])
                            except (AttributeError, TypeError, ValueError, IndexError):
                                continue

                            if title and download_url:
                                items[mode].append((title, download_url, seeders, self._bytesizer(size)))
Esempio n. 52
0
    def _search_provider(self, search_params, **kwargs):

        results = []
        if not self._authorised():
            return results

        items = {'Cache': [], 'Season': [], 'Episode': [], 'Propers': []}

        rc = dict((k, re.compile('(?i)' + v)) for (k, v) in {'info': 'detail', 'get': 'download',
                                                             'cats': 'cat=(?:%s)' % self._categories_string(template='', delimiter='|')
                                                             }.items())
        for mode in search_params.keys():
            for search_string in search_params[mode]:
                search_string = isinstance(search_string, unicode) and unidecode(search_string) or search_string
                search_url = self.urls['search'] % (self._categories_string(mode), search_string)

                html = self.get_url(search_url, timeout=self.url_timeout)

                cnt = len(items[mode])
                try:
                    if not html or self._has_no_results(html):
                        raise generic.HaltParseException

                    with BS4Parser(html, features=['html5lib', 'permissive']) as soup:
                        torrent_table = soup.find('td', attrs={'class': 'colhead'}).find_parent('table')
                        torrent_rows = [] if not torrent_table else torrent_table.find_all('tr')

                        if 2 > len(torrent_rows):
                            raise generic.HaltParseException

                        for tr in torrent_rows[1:]:
                            try:
                                info = tr.find('a', href=rc['info'])
                                if not info:
                                    continue

                                seeders, leechers, size = [tryInt(n, n) for n in [
                                    (tr.find_all('td')[x].get_text().strip()) for x in (-2, -1, -4)]]
                                if None is tr.find('a', href=rc['cats']) or self._peers_fail(mode, seeders, leechers):
                                    continue

                                title = 'title' in info.attrs and info.attrs['title'] or info.get_text().strip()
                                download_url = self.urls['get'] % tr.find('a', href=rc['get']).get('href')

                            except (AttributeError, TypeError, ValueError):
                                continue

                            if title and download_url:
                                items[mode].append((title, download_url, seeders, self._bytesizer(size)))

                except (generic.HaltParseException, AttributeError):
                    pass
                except Exception:
                    logger.log(u'Failed to parse. Traceback: %s' % traceback.format_exc(), logger.ERROR)

                self._log_search(mode, len(items[mode]) - cnt, search_url)

            self._sort_seeders(mode, items)

            results = list(set(results + items[mode]))

        return results
Esempio n. 53
0
    def _search_provider(self, search_params, **kwargs):

        results = []
        if not self._authorised():
            return results

        items = {'Cache': [], 'Season': [], 'Episode': [], 'Propers': []}

        rc = dict(
            (k, re.compile('(?i)' + v)) for (k, v) in {
                'info': 'torrent-details',
                'get': 'download',
                'peers': 'page=peers',
                'nodots': '[\.\s]+'
            }.items())
        log = ''
        if self.filter:
            non_marked = 'f0' in self.filter
            # if search_any, use unselected to exclude, else use selected to keep
            filters = ([f for f in self.may_filter if f in self.filter],
                       [f for f in self.may_filter
                        if f not in self.filter])[non_marked]
            rc['filter'] = re.compile('(?i)(%s).png' % '|'.join([
                self.may_filter[f][2] for f in filters if self.may_filter[f][1]
            ]))
            log = '%sing (%s) ' % (('keep', 'skipp')[non_marked], ', '.join(
                [self.may_filter[f][0] for f in filters]))
        for mode in search_params.keys():
            for search_string in search_params[mode]:
                search_string = isinstance(
                    search_string,
                    unicode) and unidecode(search_string) or search_string

                search_url = self.urls['browse'] + self._categories_string(
                    template='', delimiter=';')
                if 'Cache' != mode:
                    search_url += self.urls['search'] % rc['nodots'].sub(
                        ' ', search_string)

                html = self.get_url(search_url)
                if self.should_skip():
                    return results

                cnt = len(items[mode])
                try:
                    if not html or self._has_no_results(html):
                        raise generic.HaltParseException

                    with BS4Parser(html,
                                   features=['html5lib', 'permissive'],
                                   attr='width="100%"\Wclass="lista"') as soup:
                        torrent_table = soup.find_all('table',
                                                      class_='lista')[-1]
                        torrent_rows = [] if not torrent_table else torrent_table.find_all(
                            'tr')

                        if 2 > len(torrent_rows):
                            raise generic.HaltParseException

                        head = None
                        for tr in torrent_rows[1:]:
                            cells = tr.find_all('td')
                            if (6 > len(cells)
                                    or tr.find('td', class_='header') or
                                (any(self.filter) and
                                 ((non_marked
                                   and tr.find('img', src=rc['filter'])) or
                                  (not non_marked and
                                   not tr.find('img', src=rc['filter']))))):
                                continue
                            downlink = tr.find('a', href=rc['get'])
                            if None is downlink:
                                continue
                            try:
                                head = head if None is not head else self._header_row(
                                    tr)
                                seeders, leechers = [
                                    tryInt(x.get_text().strip())
                                    for x in tr.find_all('a', href=rc['peers'])
                                ]
                                if self._reject_item(seeders, leechers):
                                    continue

                                info = tr.find('a', href=rc['info'])
                                title = (info.attrs.get('title')
                                         or info.get_text()).strip()
                                size = cells[head['size']].get_text().strip()
                                download_url = self._link(downlink['href'])
                            except (AttributeError, TypeError, ValueError):
                                continue

                            if title and download_url:
                                items[mode].append(
                                    (title, download_url, seeders,
                                     self._bytesizer(size)))

                except generic.HaltParseException:
                    pass
                except (StandardError, Exception):
                    logger.log(
                        u'Failed to parse. Traceback: %s' %
                        traceback.format_exc(), logger.ERROR)
                self._log_search(mode,
                                 len(items[mode]) - cnt, log + search_url)

            results = self._sort_seeding(mode, results + items[mode])

        return results
Esempio n. 54
0
    def _doSearch(self, search_params, show=None):

        results = []
        items = {'Season': [], 'Episode': [], 'RSS': []}

        if not self._doLogin():
            return []

        for mode in search_params.keys():
            for search_string in search_params[mode]:

                if isinstance(search_string, unicode):
                    search_string = unidecode(search_string)

                searchURL = self.urls['search'] % (search_string,
                                                   self.categories)

                logger.log(u"Search string: " + searchURL, logger.DEBUG)

                data = self.getURL(searchURL)
                if not data:
                    continue

                try:
                    html = BeautifulSoup(data,
                                         features=["html5lib", "permissive"])

                    torrent_table = html.find('table',
                                              attrs={'id': 'torrents-table'})
                    torrent_rows = torrent_table.find_all(
                        'tr') if torrent_table else []

                    #Continue only if one Release is found
                    if len(torrent_rows) < 2:
                        logger.log(
                            u"The Data returned from " + self.name +
                            " do not contains any torrent", logger.DEBUG)
                        continue

                    for result in torrent_table.find_all('tr')[1:]:

                        try:
                            link = result.find('td',
                                               attrs={
                                                   'class': 'ttr_name'
                                               }).find('a')
                            url = result.find('td', attrs={
                                'class': 'td_dl'
                            }).find('a')
                            title = link.string
                            download_url = self.urls['download'] % url['href']
                            id = int(link['href'].replace('details?id=', ''))
                            seeders = int(
                                result.find('td',
                                            attrs={
                                                'class': 'ttr_seeders'
                                            }).string)
                            leechers = int(
                                result.find('td',
                                            attrs={
                                                'class': 'ttr_leechers'
                                            }).string)
                        except (AttributeError, TypeError):
                            continue

                        if mode != 'RSS' and seeders == 0:
                            continue

                        if not title or not download_url:
                            continue

                        item = title, download_url, id, seeders, leechers
                        logger.log(
                            u"Found result: " + title + "(" + searchURL + ")",
                            logger.DEBUG)

                        items[mode].append(item)

                except Exception, e:
                    logger.log(
                        u"Failed parsing " + self.name + " Traceback: " +
                        traceback.format_exc(), logger.ERROR)

            #For each search mode sort all the items by seeders
            items[mode].sort(key=lambda tup: tup[3], reverse=True)

            results += items[mode]
Esempio n. 55
0
    def _search_provider(self, search_params, **kwargs):

        results = []
        if not self._authorised():
            return results

        items = {'Cache': [], 'Season': [], 'Episode': [], 'Propers': []}

        rc = dict((k, re.compile('(?i)' + v)) for (k, v) in {
            'info': 'detail',
            'get': 'download\.',
            'fl': '\[\W*F\W?L\W*\]'
        }.items())
        for mode in search_params.keys():
            for search_string in search_params[mode]:
                search_string = isinstance(
                    search_string,
                    unicode) and unidecode(search_string) or search_string
                search_url = self.urls['search'] % (
                    search_string, self._categories_string(mode))

                html = self.get_url(search_url, timeout=90)

                cnt = len(items[mode])
                try:
                    if not html or self._has_no_results(
                            html) or 'width=750' not in html:
                        raise generic.HaltParseException

                    html = re.sub(r'</td>([^<]*)<tr>', '</td></tr>\1<tr>',
                                  html)
                    with BS4Parser(html, 'html.parser',
                                   attr='width=750') as soup:
                        torrent_table = soup.find('table',
                                                  attrs={'width': 750})
                        torrent_rows = [] if not torrent_table else torrent_table.find_all(
                            'tr')

                        if 2 > len(torrent_rows):
                            raise generic.HaltParseException

                        head = None
                        for tr in torrent_rows[1:]:
                            cells = tr.find_all('td')
                            if 6 > len(cells):
                                continue
                            try:
                                head = head if None is not head else self._header_row(
                                    tr)
                                seeders, leechers, size = [
                                    tryInt(n, n) for n in [
                                        cells[head[x]].get_text().strip()
                                        for x in 'seed', 'leech', 'size'
                                    ]
                                ]
                                if self.freeleech and not tr.attrs.get('bgcolor').endswith('FF99') or \
                                        self._peers_fail(mode, seeders, leechers):
                                    continue

                                info = tr.find('a', href=rc['info'])
                                title = (info.attrs.get('title')
                                         or info.get_text()).strip()
                                download_url = self._link(
                                    tr.find('a', href=rc['get'])['href'])
                            except (AttributeError, TypeError, ValueError,
                                    KeyError):
                                continue

                            if title and download_url:
                                items[mode].append(
                                    (title, download_url, seeders,
                                     self._bytesizer(size)))
Esempio n. 56
0
    def _doSearch(self, search_params, search_mode='eponly', epcount=0, age=0):

        results = []
        items = {'Season': [], 'Episode': [], 'RSS': []}

        if self.proxy and self.proxy.isEnabled():
            self.headers.update({'referer': self.proxy.getProxyURL()})

        for mode in search_params.keys():
            for search_string in search_params[mode]:

                if mode != 'RSS':
                    searchURL = self.proxy._buildURL(self.searchurl % (urllib.quote(unidecode(search_string))))
                else:
                    searchURL = self.proxy._buildURL(self.url + 'tv/latest/')

                logger.log(u"Search string: " + searchURL, logger.DEBUG)

                data = self.getURL(searchURL)
                if not data:
                    continue

                re_title_url = self.proxy._buildRE(self.re_title_url)

                #Extracting torrent information from data returned by searchURL                   
                match = re.compile(re_title_url, re.DOTALL).finditer(urllib.unquote(data))
                for torrent in match:

                    title = torrent.group('title').replace('_',
                                                           '.')  #Do not know why but SickBeard skip release with '_' in name
                    url = torrent.group('url')
                    id = int(torrent.group('id'))
                    seeders = int(torrent.group('seeders'))
                    leechers = int(torrent.group('leechers'))

                    #Filter unseeded torrent
                    if mode != 'RSS' and (seeders < self.minseed or leechers < self.minleech):
                        continue

                        #Accept Torrent only from Good People for every Episode Search
                    if self.confirmed and re.search('(VIP|Trusted|Helper|Moderator)', torrent.group(0)) is None:
                        logger.log(u"ThePirateBay Provider found result " + torrent.group(
                            'title') + " but that doesn't seem like a trusted result so I'm ignoring it", logger.DEBUG)
                        continue

                    #Check number video files = episode in season and find the real Quality for full season torrent analyzing files in torrent 
                    if mode == 'Season' and search_mode == 'sponly':
                        ep_number = int(epcount / len(set(allPossibleShowNames(self.show))))
                        title = self._find_season_quality(title, id, ep_number)

                    if not title or not url:
                        continue

                    item = title, url, id, seeders, leechers

                    items[mode].append(item)

            #For each search mode sort all the items by seeders
            items[mode].sort(key=lambda tup: tup[3], reverse=True)

            results += items[mode]

        return results
Esempio n. 57
0
    def _search_provider(self, search_params, **kwargs):

        results = []
        if not self._authorised():
            return results

        items = {'Cache': [], 'Season': [], 'Episode': [], 'Propers': []}

        rc = dict((k, re.compile('(?i)' + v)) for (k, v) in {
            'show_id': r'"show\?id=(\d+)[^>]+>([^<]+)<\/a>',
            'get': 'load_torrent'
        }.items())
        search_types = sorted([x for x in search_params.items()],
                              key=lambda tup: tup[0],
                              reverse=True)
        maybe_only = search_types[0][0]
        show_detail = '_only' in maybe_only and search_params.pop(
            maybe_only)[0] or ''
        for mode in search_params.keys():
            for search_string in search_params[mode]:
                if 'Cache' == mode:
                    search_url = self.urls['browse']
                    html = self.get_url(search_url)
                    if self.should_skip():
                        return results
                else:
                    search_string = isinstance(
                        search_string,
                        unicode) and unidecode(search_string) or search_string
                    search_string = search_string.replace(show_detail,
                                                          '').strip()
                    search_url = self.urls['search'] % search_string
                    html = self.get_url(search_url)
                    if self.should_skip():
                        return results

                    shows = rc['show_id'].findall(html)
                    if any(shows):
                        html = ''
                        for show in set(shows):
                            sid, title = show
                            if title in unquote_plus(search_string):
                                html and time.sleep(1.1)
                                html += self.get_url(self.urls['show'] % sid)
                                if self.should_skip():
                                    return results

                cnt = len(items[mode])
                try:
                    if not html or self._has_no_results(html):
                        raise generic.HaltParseException

                    with BS4Parser(html) as tbl:
                        tbl_rows = tbl.tbody.find_all(
                            'tr') or tbl.table.find_all('tr') or []

                        if 2 > len(tbl_rows):
                            raise generic.HaltParseException

                        head = None
                        for tr in tbl_rows[0:]:
                            cells = tr.find_all('td')
                            if 4 > len(cells):
                                continue
                            try:
                                head = head if None is not head else self._header_row(
                                    tr)
                                stats = cells[head['leech']].get_text().strip()
                                seeders, leechers = [
                                    (tryInt(x[0], 0), tryInt(x[1], 0))
                                    for x in re.findall(
                                        r'(?::(\d+))(?:\W*[/]\W*:(\d+))?',
                                        stats) if x[0]
                                ][0]
                                if self._reject_item(seeders, leechers):
                                    continue
                                sizes = [
                                    (tryInt(x[0], x[0]), tryInt(x[1], False))
                                    for x in re.findall(
                                        r'([\d.]+\w+)?(?:\s*[(\[](\d+)[)\]])?',
                                        stats) if x[0]
                                ][0]
                                size = sizes[(0, 1)[1 < len(sizes)]]

                                for element in [
                                        x for x in cells[2].contents[::-1]
                                        if unicode(x).strip()
                                ]:
                                    if 'NavigableString' in str(
                                            element.__class__):
                                        title = unicode(element).strip()
                                        break

                                download_url = self._link(
                                    tr.find('a', href=rc['get'])['href'])
                            except (AttributeError, TypeError, ValueError):
                                continue

                            if title and download_url:
                                items[mode].append(
                                    (title, download_url, seeders,
                                     self._bytesizer(size)))

                except generic.HaltParseException:
                    pass
                except (BaseException, Exception):
                    logger.log(
                        u'Failed to parse. Traceback: %s' %
                        traceback.format_exc(), logger.ERROR)
                self._log_search(mode, len(items[mode]) - cnt, search_url)

            results = self._sort_seeding(mode, results + items[mode])

        return results
Esempio n. 58
0
    def _search_provider(self, search_params, **kwargs):

        results = []
        if not self._authorised():
            return results

        items = {'Cache': [], 'Season': [], 'Episode': [], 'Propers': []}

        rc = dict(
            (k, re.compile('(?i)' + v)) for (k, v) in {
                'get': 'info.php\?id',
                'valid_cat': 'cat=(?:0|50[12])',
                'filter': 'free',
                'title': r'Download\s*([^\s]+).*',
                'seeders': r'(^\d+)',
                'leechers': r'(\d+)$'
            }.items())
        for mode in search_params.keys():
            for search_string in search_params[mode]:
                search_string = isinstance(
                    search_string,
                    unicode) and unidecode(search_string) or search_string
                search_url = self.urls['search'] % search_string

                html = self.get_url(search_url)
                if self.should_skip():
                    return results

                cnt = len(items[mode])
                try:
                    if not html or self._has_no_results(html):
                        raise generic.HaltParseException

                    with BS4Parser(html, features=['html5lib',
                                                   'permissive']) as soup:
                        torrent_table = soup.find('table', 'listor')
                        torrent_rows = [] if not torrent_table else torrent_table.find_all(
                            'tr')

                        if 2 > len(torrent_rows):
                            raise generic.HaltParseException

                        head = None
                        for tr in torrent_rows[1:]:
                            cells = tr.find_all('td')
                            if 5 > len(cells):
                                continue
                            try:
                                head = head if None is not head else self._header_row(
                                    tr, {'seed': r'(?:see/lee|seed)'})
                                seeders, leechers = 2 * [
                                    cells[head['seed']].get_text().strip()
                                ]
                                seeders, leechers = [
                                    tryInt(n) for n in [
                                        rc['seeders'].findall(seeders)[0],
                                        rc['leechers'].findall(leechers)[0]
                                    ]
                                ]
                                if self._peers_fail(mode, seeders, leechers) or not tr.find('a', href=rc['valid_cat']) \
                                        or (self.freeleech and not tr.find('img', src=rc['filter'])):
                                    continue

                                info = tr.find('a', href=rc['get'])
                                title = (rc['title'].sub(
                                    r'\1', info.attrs.get('title', ''))
                                         or info.get_text()).strip()
                                size = cells[head['size']].get_text().strip()
                                download_url = self._link(info['href'])
                            except (AttributeError, TypeError, ValueError,
                                    KeyError, IndexError):
                                continue

                            if title and download_url:
                                items[mode].append(
                                    (title, download_url, seeders,
                                     self._bytesizer(size)))

                except generic.HaltParseException:
                    pass
                except (StandardError, Exception):
                    logger.log(
                        u'Failed to parse. Traceback: %s' %
                        traceback.format_exc(), logger.ERROR)

                self._log_search(mode, len(items[mode]) - cnt, search_url)

            results = self._sort_seeding(mode, results + items[mode])

        return results
Esempio n. 59
0
    def _doSearch(self, search_params, epcount=0, age=0):

        results = []
        items = {'Season': [], 'Episode': [], 'RSS': []}

        if not self._doLogin():
            return []

        for mode in search_params.keys():
            for search_string in search_params[mode]:

                if isinstance(search_string, unicode):
                    search_string = unidecode(search_string)

                nonsceneSearchURL = None
                foreignSearchURL = None
                if mode == 'Season':
                    searchURL = self.urls['archive'] % (search_string)
                    data = [self.getURL(searchURL, headers=self.headers)]
                else:
                    searchURL = self.urls['search'] % (search_string,
                                                       self.categories)
                    nonsceneSearchURL = self.urls['nonscene'] % (search_string)
                    foreignSearchURL = self.urls['foreign'] % (search_string)
                    data = [
                        self.getURL(searchURL, headers=self.headers),
                        self.getURL(nonsceneSearchURL, headers=self.headers),
                        self.getURL(foreignSearchURL, headers=self.headers)
                    ]
                    logger.log(u"Search string: " + nonsceneSearchURL,
                               logger.DEBUG)
                    logger.log(u"Search string: " + foreignSearchURL,
                               logger.DEBUG)

                logger.log(u"Search string: " + searchURL, logger.DEBUG)

                if not data:
                    continue

                try:
                    for dataItem in data:
                        html = BeautifulSoup(
                            dataItem, features=["html5lib", "permissive"])

                        torrent_table = html.find(
                            'table', attrs={'id': 'torrents-table'})
                        torrent_rows = torrent_table.find_all(
                            'tr') if torrent_table else []

                        #Continue only if at least one Release is found
                        if len(torrent_rows) < 2:
                            if html.title:
                                source = self.name + " (" + html.title.string + ")"
                            else:
                                source = self.name
                            logger.log(
                                u"The Data returned from " + source +
                                " does not contain any torrent", logger.DEBUG)
                            continue

                        for result in torrent_table.find_all('tr')[1:]:

                            try:
                                link = result.find('td',
                                                   attrs={
                                                       'class': 'ttr_name'
                                                   }).find('a')
                                all_urls = result.find('td',
                                                       attrs={
                                                           'class': 'td_dl'
                                                       }).find_all('a',
                                                                   limit=2)
                                # Foreign section contain two links, the others one
                                if self._isSection('Foreign', dataItem):
                                    url = all_urls[1]
                                else:
                                    url = all_urls[0]
                                title = link.string
                                if re.search('\.\.\.', title):
                                    details_html = BeautifulSoup(
                                        self.getURL(self.url + "/" +
                                                    link['href']))
                                    title = re.search(
                                        '(?<=").+(?<!")',
                                        details_html.title.string).group(0)
                                download_url = self.urls['download'] % url[
                                    'href']
                                id = int(link['href'].replace(
                                    'details?id=', ''))
                                seeders = int(
                                    result.find('td',
                                                attrs={
                                                    'class': 'ttr_seeders'
                                                }).string)
                                leechers = int(
                                    result.find('td',
                                                attrs={
                                                    'class': 'ttr_leechers'
                                                }).string)
                            except (AttributeError, TypeError):
                                continue

                            if mode != 'RSS' and (seeders == 0
                                                  or seeders < self.minseed
                                                  or leechers < self.minleech):
                                continue

                            if not title or not download_url:
                                continue

                            item = title, download_url, id, seeders, leechers

                            if self._isSection('Non-Scene', dataItem):
                                logger.log(
                                    u"Found result: " + title + "(" +
                                    nonsceneSearchURL + ")", logger.DEBUG)
                            elif self._isSection('Foreign', dataItem):
                                logger.log(
                                    u"Found result: " + title + "(" +
                                    foreignSearchURL + ")", logger.DEBUG)
                            else:
                                logger.log(
                                    u"Found result: " + title + "(" +
                                    searchURL + ")", logger.DEBUG)

                            items[mode].append(item)

                except Exception, e:
                    logger.log(
                        u"Failed parsing " + self.name + " Traceback: " +
                        traceback.format_exc(), logger.ERROR)

            #For each search mode sort all the items by seeders
            items[mode].sort(key=lambda tup: tup[3], reverse=True)

            results += items[mode]
Esempio n. 60
0
    def _search_provider(self, search_params, **kwargs):

        results = []
        if not self._authorised():
            return results

        items = {'Cache': [], 'Season': [], 'Episode': [], 'Propers': []}

        rc = dict((k, re.compile('(?i)' + v)) for (k, v) in {
            'info': 'detail',
            'get': 'download',
            'fl': 'free'
        }.items())
        for mode in search_params.keys():
            for search_string in search_params[mode]:
                search_string = isinstance(
                    search_string,
                    unicode) and unidecode(search_string) or search_string

                if 'Cache' != mode:
                    kwargs = dict(
                        post_data={
                            'keywords': search_string,
                            'do': 'quick_sort',
                            'page': '0',
                            'category': '0',
                            'search_type': 't_name',
                            'sort': 'added',
                            'order': 'desc',
                            'daysprune': '-1'
                        })

                html = self.get_url(self.urls['search'], **kwargs)

                cnt = len(items[mode])
                try:
                    if not html or self._has_no_results(html):
                        raise generic.HaltParseException

                    with BS4Parser(html, 'html.parser') as soup:
                        torrent_table = soup.find('table', id='sortabletable')
                        torrent_rows = [] if not torrent_table else torrent_table.find_all(
                            'tr')
                        get_detail = True

                        if 2 > len(torrent_rows):
                            raise generic.HaltParseException

                        for tr in torrent_rows[1:]:
                            try:
                                seeders, leechers, size = [
                                    tryInt(n, n) for n in [
                                        tr.find_all('td')
                                        [x].get_text().strip()
                                        for x in (-3, -2, -5)
                                    ]
                                ]
                                if self._peers_fail(mode, seeders, leechers) \
                                        or self.freeleech and None is tr.find_all('td')[1].find('img', title=rc['fl']):
                                    continue

                                info = tr.find('a', href=rc['info'])
                                title = (tr.find('div',
                                                 attrs={
                                                     'class': 'tooltip-content'
                                                 }).get_text()
                                         or info.get_text()).strip()
                                title = re.findall('(?m)(^[^\r\n]+)', title)[0]
                                download_url = self.urls['get'] % str(
                                    tr.find('a', href=rc['get'])['href']
                                ).lstrip('/').replace(
                                    self.urls['config_provider_home_uri'], '')
                            except Exception:
                                continue

                            if get_detail and title.endswith('...'):
                                try:
                                    with BS4Parser(
                                            self.get_url('%s%s' % (
                                                self.urls[
                                                    'config_provider_home_uri'],
                                                info['href'].lstrip('/').
                                                replace(
                                                    self.urls[
                                                        'config_provider_home_uri'],
                                                    ''))),
                                            'html.parser') as soup_detail:
                                        title = soup_detail.find(
                                            'td',
                                            attrs={
                                                'colspan': '3',
                                                'class': 'thead'
                                            }).get_text().strip()
                                        title = re.findall(
                                            '(?m)(^[^\r\n]+)', title)[0]
                                except IndexError:
                                    continue
                                except Exception:
                                    get_detail = False

                            try:
                                has_series = re.findall(
                                    '(?i)(.*?series[^\d]*?\d+)(.*)', title)
                                if has_series:
                                    rc_xtras = re.compile(
                                        '(?i)([. _-]|^)(special|extra)s?\w*([. _-]|$)'
                                    )
                                    has_special = rc_xtras.findall(
                                        has_series[0][1])
                                    if has_special:
                                        title = has_series[0][0] + rc_xtras.sub(
                                            list(
                                                set(
                                                    list(has_special[0][0]) +
                                                    list(has_special[0][2])))
                                            [0], has_series[0][1])
                                    title = re.sub('(?i)series', r'Season',
                                                   title)

                                title_parts = re.findall(
                                    '(?im)^(.*?)(?:Season[^\d]*?(\d+).*?)?(?:(?:pack|part|pt)\W*?)?(\d+)[^\d]*?of[^\d]*?(?:\d+)(.*?)$',
                                    title)
                                if len(title_parts):
                                    new_parts = [
                                        tryInt(part, part.strip())
                                        for part in title_parts[0]
                                    ]
                                    if not new_parts[1]:
                                        new_parts[1] = 1
                                    new_parts[2] = ('E%02d', ' Pack %d')[
                                        mode in 'Season'] % new_parts[2]
                                    title = '%s.S%02d%s.%s' % tuple(new_parts)

                                dated = re.findall(
                                    '(?i)([\(\s]*)((?:\d\d\s)?[adfjmnos]\w{2,}\s+(?:19|20)\d\d)([\)\s]*)',
                                    title)
                                if dated:
                                    title = title.replace(
                                        ''.join(dated[0]), '%s%s%s' %
                                        (('', ' ')[1 < len(dated[0][0])],
                                         parse(
                                             dated[0][1]).strftime('%Y-%m-%d'),
                                         ('', ' ')[1 < len(dated[0][2])]))
                                    add_pad = re.findall(
                                        '((?:19|20)\d\d\-\d\d\-\d\d)([\w\W])',
                                        title)
                                    if len(add_pad) and add_pad[0][1] not in [
                                            ' ', '.'
                                    ]:
                                        title = title.replace(
                                            ''.join(add_pad[0]), '%s %s' %
                                            (add_pad[0][0], add_pad[0][1]))

                                if title and download_url:
                                    items[mode].append(
                                        (title, download_url, seeders,
                                         self._bytesizer(size)))
                            except Exception:
                                pass

                except generic.HaltParseException:
                    pass
                except Exception:
                    logger.log(
                        u'Failed to parse. Traceback: %s' %
                        traceback.format_exc(), logger.ERROR)

                self._log_search(mode,
                                 len(items[mode]) - cnt,
                                 ('search string: ' +
                                  search_string.replace('%', ' '),
                                  self.name)['Cache' == mode])

                if mode in 'Season' and len(items[mode]):
                    break

            self._sort_seeders(mode, items)

            results = list(set(results + items[mode]))

        return results