Example #1
0
    def parse(self, data, mode):
        """
        Parse search results for items.

        :param data: The raw response from a search
        :param mode: The current mode used to search, e.g. RSS

        :return: A list of items found
        """
        items = []

        del data['total_results']
        torrent_rows = data['results']

        for row in torrent_rows:
            try:
                title = row.get('release_name')
                download_url = row.get('download_url')
                if not all([title, download_url]):
                    continue

                seeders = row.get('seeders')
                leechers = row.get('leechers')

                # Filter unseeded torrent
                if seeders < self.minseed:
                    if mode != 'RSS':
                        log.debug("Discarding torrent because it doesn't meet the"
                                  ' minimum seeders: {0}. Seeders: {1}',
                                  title, seeders)
                    continue

                freeleech = row.get('freeleech')
                if self.freeleech and not freeleech:
                    continue

                torrent_size = '{0} MB'.format(row.get('size', -1))
                size = convert_size(torrent_size) or -1

                pubdate_raw = row.get('publish_date')
                pubdate = self.parse_pubdate(pubdate_raw, timezone='Europe/Copenhagen')

                item = {
                    'title': title,
                    'link': download_url,
                    'size': size,
                    'seeders': seeders,
                    'leechers': leechers,
                    'pubdate': pubdate,
                }
                if mode != 'RSS':
                    log.debug('Found result: {0} with {1} seeders and {2} leechers',
                              title, seeders, leechers)

                items.append(item)
            except (AttributeError, TypeError, KeyError, ValueError, IndexError):
                log.exception('Failed parsing provider.')

        return items
Example #2
0
    def parse(self, data, mode):
        """
        Parse search results for items.

        :param data: The raw response from a search
        :param mode: The current mode used to search, e.g. RSS

        :return: A list of items found
        """
        items = []

        with BS4Parser(data, 'html5lib') as html:
            torrent_rows = html('item')

            for row in torrent_rows:
                try:
                    if row.category and 'video' not in row.category.get_text(strip=True).lower():
                        continue

                    title_raw = row.title.text
                    # Add "-" after codec and add missing "."
                    title = re.sub(r'([xh][ .]?264|xvid)( )', r'\1-', title_raw).replace(' ', '.') if title_raw else ''
                    info_hash = row.guid.text.rsplit('/', 1)[-1]
                    download_url = 'magnet:?xt=urn:btih:' + info_hash + '&dn=' + title + self._custom_trackers
                    if not all([title, download_url]):
                        continue

                    torrent_size, seeders, leechers = self._split_description(row.find('description').text)
                    size = convert_size(torrent_size) or -1

                    pubdate_raw = row.pubdate.get_text()
                    pubdate = self.parse_pubdate(pubdate_raw)

                    # Filter unseeded torrent
                    if seeders < self.minseed:
                        if mode != 'RSS':
                            log.debug("Discarding torrent because it doesn't meet the"
                                      ' minimum seeders: {0}. Seeders: {1}',
                                      title, seeders)
                        continue

                    item = {
                        'title': title,
                        'link': download_url,
                        'size': size,
                        'seeders': seeders,
                        'leechers': leechers,
                        'pubdate': pubdate,
                    }
                    if mode != 'RSS':
                        log.debug('Found result: {0} with {1} seeders and {2} leechers',
                                  title, seeders, leechers)

                    items.append(item)
                except (AttributeError, TypeError, KeyError, ValueError, IndexError):
                    log.exception('Failed parsing provider.')

        return items
Example #3
0
    def parse(self, data, mode):
        """
        Parse search results for items.

        :param data: The raw response from a search
        :param mode: The current mode used to search, e.g. RSS

        :return: A list of items found
        """
        items = []

        torrent_rows = data.get('torrent_results', {})
        if not torrent_rows:
            log.debug('Data returned from provider does not contain any torrents')
            return items

        for row in torrent_rows:
            try:
                title = row.pop('title')
                download_url = row.pop('download') + self._custom_trackers
                if not all([title, download_url]):
                    continue

                seeders = row.pop('seeders', 0)
                leechers = row.pop('leechers', 0)

                # Filter unseeded torrent
                if seeders < self.minseed:
                    if mode != 'RSS':
                        log.debug("Discarding torrent because it doesn't meet the"
                                  ' minimum seeders: {0}. Seeders: {1}',
                                  title, seeders)
                    continue

                torrent_size = row.pop('size', None)
                size = convert_size(torrent_size, default=-1)

                pubdate_raw = row.pop('pubdate', None)
                pubdate = self.parse_pubdate(pubdate_raw)

                item = {
                    'title': title,
                    'link': download_url,
                    'size': size,
                    'seeders': seeders,
                    'leechers': leechers,
                    'pubdate': pubdate,
                }
                if mode != 'RSS':
                    log.debug('Found result: {0} with {1} seeders and {2} leechers',
                              title, seeders, leechers)

                items.append(item)
            except (AttributeError, TypeError, KeyError, ValueError, IndexError):
                log.exception('Failed parsing provider.')

        return items
Example #4
0
    def parse(self, data, mode):
        """
        Parse search results for items.

        :param data: The raw response from a search
        :param mode: The current mode used to search, e.g. RSS

        :return: A list of items found
        """
        items = []

        torrents = data['torrentList']
        user_timezone = data.get('userTimeZone', 'UTC')

        # Continue only if at least one release is found
        if not torrents:
            log.debug('Data returned from provider does not contain any torrents')
            return items

        for torrent in torrents:
            try:
                title = torrent['name']
                download_url = self.urls['download'].format(id=torrent['fid'], file=torrent['filename'])

                seeders = int(torrent['seeders'])
                leechers = int(torrent['leechers'])

                # Filter unseeded torrent
                if seeders < self.minseed:
                    if mode != 'RSS':
                        log.debug("Discarding torrent because it doesn't meet the"
                                  ' minimum seeders: {0}. Seeders: {1}',
                                  title, seeders)
                    continue

                size = convert_size(torrent['size']) or -1

                pubdate_raw = torrent['addedTimestamp']
                pubdate = self.parse_pubdate(pubdate_raw, timezone=user_timezone)

                item = {
                    'title': title,
                    'link': download_url,
                    'size': size,
                    'seeders': seeders,
                    'leechers': leechers,
                    'pubdate': pubdate,
                }
                if mode != 'RSS':
                    log.debug('Found result: {0} with {1} seeders and {2} leechers',
                              title, seeders, leechers)

                items.append(item)
            except (AttributeError, TypeError, KeyError, ValueError, IndexError):
                log.exception('Failed parsing provider.')

        return items
Example #5
0
    def parse(self, data, mode):
        """
        Parse search results for items.

        :param data: The raw response from a search
        :param mode: The current mode used to search, e.g. RSS

        :return: A list of items found
        """
        items = []
        json_data = data.get('data', {})
        torrent_rows = json_data.get('torrents', [])

        for row in torrent_rows:
            try:
                title = row.pop('name', '')
                download_url = '{0}?{1}'.format(
                    self.urls['download'],
                    urlencode({'id': row.pop('id', ''), 'passkey': self.passkey}))

                if not all([title, download_url]):
                    continue

                seeders = try_int(row.pop('seeders', 0))
                leechers = try_int(row.pop('leechers', 0))

                # Filter unseeded torrent
                if seeders < self.minseed:
                    if mode != 'RSS':
                        log.debug("Discarding torrent because it doesn't meet the"
                                  ' minimum seeders: {0}. Seeders: {1}',
                                  title, seeders)
                    continue

                size = convert_size(row.pop('size', -1), -1)

                item = {
                    'title': title,
                    'link': download_url,
                    'size': size,
                    'seeders': seeders,
                    'leechers': leechers,
                    'pubdate': None,
                }
                if mode != 'RSS':
                    log.debug('Found result: {0} with {1} seeders and {2} leechers',
                              title, seeders, leechers)

                items.append(item)
            except (AttributeError, TypeError, KeyError, ValueError, IndexError):
                log.exception('Failed parsing provider.')

        return items
Example #6
0
    def parse(self, data, mode):
        """
        Parse search results for items.

        :param data: The raw response from a search
        :param mode: The current mode used to search, e.g. RSS

        :return: A list of items found
        """
        items = []
        torrent_rows = data.pop('torrents', {})

        if not torrent_rows:
            log.debug('Provider has no results for this search')
            return items

        for row in torrent_rows:
            try:
                title = row.get('name')
                download_url = row.get('download_link')
                if not all([title, download_url]):
                    continue

                seeders = row.get('seeders')
                leechers = row.get('leechers')

                # Filter unseeded torrent
                if seeders < self.minseed:
                    if mode != 'RSS':
                        log.debug("Discarding torrent because it doesn't meet the"
                                  ' minimum seeders: {0}. Seeders: {1}',
                                  title, seeders)
                    continue

                size = convert_size(row.get('size'), default=-1)

                item = {
                    'title': title,
                    'link': download_url,
                    'size': size,
                    'seeders': seeders,
                    'leechers': leechers,
                    'hash': '',
                }
                if mode != 'RSS':
                    log.debug('Found result: {0} with {1} seeders and {2} leechers',
                              title, seeders, leechers)

                items.append(item)
            except (AttributeError, TypeError, KeyError, ValueError, IndexError):
                log.exception('Failed parsing provider.')

        return items
Example #7
0
    def parse(self, data, mode):
        """
        Parse search results for items.

        :param data: The raw response from a search
        :param mode: The current mode used to search, e.g. RSS

        :return: A list of items found
        """
        # Units
        units = ['B', 'KIB', 'MIB', 'GIB', 'TIB', 'PIB']

        items = []

        for item in data:
            try:
                title = item['title']
                download_url = item['link']
                if not all([title, download_url]):
                    continue

                seeders = try_int(item['nyaa_seeders'])
                leechers = try_int(item['nyaa_leechers'])

                # Filter unseeded torrent
                if seeders < self.minseed:
                    if mode != 'RSS':
                        log.debug("Discarding torrent because it doesn't meet the"
                                  ' minimum seeders: {0}. Seeders: {1}',
                                  title, seeders)
                    continue

                size = convert_size(item['nyaa_size'], default=-1, units=units)

                pubdate = self.parse_pubdate(item['published'])

                item = {
                    'title': title,
                    'link': download_url,
                    'size': size,
                    'seeders': seeders,
                    'leechers': leechers,
                    'pubdate': pubdate,
                }
                if mode != 'RSS':
                    log.debug('Found result: {0} with {1} seeders and {2} leechers',
                              title, seeders, leechers)

                items.append(item)
            except (AttributeError, TypeError, KeyError, ValueError, IndexError):
                log.exception('Failed parsing provider.')

        return items
Example #8
0
    def parse(self, data, mode):
        """
        Parse search results for items.

        :param data: The raw response from a search
        :param mode: The current mode used to search, e.g. RSS

        :return: A list of items found
        """
        items = []
        torrent_rows = data.pop('torrents', {})

        # Skip column headers
        for row in torrent_rows:
            try:
                title = row.pop('title', '')
                info_hash = row.pop('infoHash', '')
                download_url = 'magnet:?xt=urn:btih:' + info_hash
                if not all([title, download_url, info_hash]):
                    continue

                swarm = row.pop('swarm', {})
                seeders = try_int(swarm.pop('seeders', 0))
                leechers = try_int(swarm.pop('leechers', 0))

                # Filter unseeded torrent
                if seeders < self.minseed:
                    if mode != 'RSS':
                        log.debug("Discarding torrent because it doesn't meet the"
                                  ' minimum seeders: {0}. Seeders: {1}',
                                  title, seeders)
                    continue

                size = convert_size(row.pop('size', -1)) or -1

                item = {
                    'title': title,
                    'link': download_url,
                    'size': size,
                    'seeders': seeders,
                    'leechers': leechers,
                    'pubdate': None,
                }
                if mode != 'RSS':
                    log.debug('Found result: {0} with {1} seeders and {2} leechers',
                              title, seeders, leechers)

                items.append(item)
            except (AttributeError, TypeError, KeyError, ValueError, IndexError):
                log.exception('Failed parsing provider.')

        return items
Example #9
0
    def parse(self, data, mode):
        """
        Parse search results for items.

        :param data: The raw response from a search
        :param mode: The current mode used to search, e.g. RSS

        :return: A list of items found
        """
        items = []

        with BS4Parser(data, 'html5lib') as html:
            torrents = html('tr')

            if not torrents or len(torrents) < 2:
                log.debug('Data returned from provider does not contain any torrents')
                return items

            # Skip column headers
            for row in torrents[1:]:
                # Skip extraneous rows at the end
                if len(row.contents) < 10:
                    continue

                try:
                    title = row.find(class_='torrent-filename').get_text(strip=True)
                    download_url = row.find(class_='torrent-download-icon').get('href')
                    seeders = row.contents[13].get_text()
                    leechers = row.contents[15].get_text()
                    size = convert_size(row.contents[11].get_text(strip=True), default=-1)
                    pubdate = self.parse_pubdate(row.contents[7].contents[1].get('title'))

                    item = {
                        'title': title,
                        'link': download_url,
                        'size': size,
                        'seeders': seeders,
                        'leechers': leechers,
                        'pubdate': pubdate,
                    }
                    log.debug('Found result: {0} with {1} seeders and {2} leechers',
                              title, seeders, leechers)

                    items.append(item)
                except (AttributeError, TypeError, KeyError, ValueError, IndexError):
                    log.exception('Failed parsing provider.')

        return items
Example #10
0
    def parse(self, data, mode):
        """
        Parse search results for items.

        :param data: The raw response from a search
        :param mode: The current mode used to search, e.g. RSS

        :return: A list of items found
        """
        items = []

        with BS4Parser(data, 'html5lib') as html:
            entries = html('item')

            for item in entries:
                try:
                    title = item.title.get_text(strip=True)
                    download_url = item.enclosure.get('url').strip()
                    if not (title and download_url):
                        continue

                    # description = item.find('description')
                    size = convert_size(item.enclosure.get('length'), default=-1)

                    pubdate_raw = item.pubdate.get_text(strip=True)
                    pubdate = self.parse_pubdate(pubdate_raw)

                    item = {
                        'title': title,
                        'link': download_url,
                        'size': size,
                        'pubdate': pubdate,
                    }

                    items.append(item)
                except (AttributeError, TypeError, KeyError, ValueError, IndexError):
                    log.exception('Failed parsing provider.')

        return items
Example #11
0
    def parse(self, data, mode):
        """
        Parse search results for items.

        :param data: The raw response from a search
        :param mode: The current mode used to search, e.g. RSS

        :return: A list of items found
        """
        items = []

        with BS4Parser(data, 'html5lib') as html:
            # Continue only if at least one release is found
            empty = html.find('h2', text='No .torrents fit this filter criteria')
            if empty:
                log.debug('Data returned from provider does not contain any torrents')
                return items

            torrent_table = html.find('table', attrs={'style': 'border: none; width: 100%;'})
            torrent_rows = torrent_table('tr', class_='browse') if torrent_table else []

            for row in torrent_rows:
                cells = row('td')

                try:
                    title = cells[1].find('a').get('title')
                    torrent_url = cells[2].find('a').get('href')
                    download_url = urljoin(self.url, torrent_url)
                    if not all([title, torrent_url]):
                        continue

                    seeders = try_int(cells[9].get_text(), 1)
                    leechers = try_int(cells[10].get_text())

                    # Filter unseeded torrent
                    if seeders < self.minseed:
                        if mode != 'RSS':
                            log.debug("Discarding torrent because it doesn't meet the"
                                      ' minimum seeders: {0}. Seeders: {1}',
                                      title, seeders)
                        continue

                    torrent_size = self._norm_size(cells[7].get_text(strip=True))
                    size = convert_size(torrent_size) or -1

                    item = {
                        'title': title,
                        'link': download_url,
                        'size': size,
                        'seeders': seeders,
                        'leechers': leechers,
                        'pubdate': None,
                    }
                    if mode != 'RSS':
                        log.debug('Found result: {0} with {1} seeders and {2} leechers',
                                  title, seeders, leechers)

                    items.append(item)
                except (AttributeError, TypeError, KeyError, ValueError, IndexError):
                    log.exception('Failed parsing provider.')

        return items
Example #12
0
    def parse(self, data, mode):
        """
        Parse search results for items.

        :param data: The raw response from a search
        :param mode: The current mode used to search, e.g. RSS

        :return: A list of items found
        """
        items = []

        for row in data:

            try:
                # Check if this is a freeleech torrent and if we've configured to only allow freeleech.
                if self.freeleech and row.get('download-multiplier') != 0:
                    continue

                title = re.sub(r'\[.*\=.*\].*\[/.*\]', '', row['name']) if row['name'] else None
                download_url = urljoin(self.urls['download'], '{0}/{1}.torrent'.format(
                    row['t'], row['name']
                )) if row['t'] and row['name'] else None

                if not all([title, download_url]):
                    continue

                seeders = int(row['seeders'])
                leechers = int(row['leechers'])

                # Filter unseeded torrent
                if seeders < self.minseed:
                    if mode != 'RSS':
                        log.debug("Discarding torrent because it doesn't meet the"
                                  ' minimum seeders: {0}. Seeders: {1}',
                                  title, seeders)
                    continue

                torrent_size = row['size']
                size = convert_size(torrent_size) or -1

                if mode != 'RSS':
                    log.debug('Found result: {0} with {1} seeders and {2} leechers',
                              title, seeders, leechers)

                pubdate_raw = row['ctime']
                pubdate = self.parse_pubdate(pubdate_raw, fromtimestamp=True)

                item = {
                    'title': title,
                    'link': download_url,
                    'size': size,
                    'seeders': seeders,
                    'leechers': leechers,
                    'pubdate': pubdate,
                }
                if mode != 'RSS':
                    log.debug('Found result: {0} with {1} seeders and {2} leechers',
                              title, seeders, leechers)

                items.append(item)
            except (AttributeError, TypeError, KeyError, ValueError, IndexError):
                log.exception('Failed parsing provider.')

        return items
Example #13
0
    def parse(self, data, mode):
        """
        Parse search results for items.

        :param data: The raw response from a search
        :param mode: The current mode used to search, e.g. RSS

        :return: A list of items found
        """
        def get_label_title(label):
            """Get table row header labels."""
            if label.get_text():
                return label.get_text(strip=True)
            if label.a and label.a.get_text(strip=True):
                return label.a.get_text(strip=True)
            if label.img:
                return label.img.get('title')

        items = []
        if '<h2>Nothing found!</h2>' in data:
            log.debug('Data returned from provider does not contain any torrents')
            return items

        with BS4Parser(data, 'html.parser') as html:
            torrent_table = html.find('table', width='100%')
            torrent_rows = torrent_table('tr') if torrent_table else []

            # Continue only if at least one release is found
            if len(torrent_rows) < 1:
                log.debug('Data returned from provider does not contain any torrents')
                return items

            # Cat., Active, Name, Download, Added, Size, Uploader, Seeders, Leechers
            labels = [get_label_title(label) for label in
                      torrent_rows[0]('td')]

            for row in torrent_rows[1:]:
                try:
                    cells = row.findChildren('td')[:len(labels)]
                    if len(cells) < len(labels):
                        continue

                    title = cells[labels.index('Name')].a
                    title = title.get_text(strip=True) if title else None
                    link = cells[labels.index('Download')].a
                    link = link.get('href') if link else None
                    download_url = urljoin(self.url, link) if link else None
                    if not all([title, download_url]):
                        continue

                    seeders = try_int(cells[labels.index('Seeders')].get_text(strip=True))
                    leechers = try_int(cells[labels.index('Leechers')].get_text(strip=True))

                    # Filter unseeded torrent
                    if seeders < self.minseed:
                        if mode != 'RSS':
                            log.debug("Discarding torrent because it doesn't meet the"
                                      ' minimum seeders: {0}. Seeders: {1}',
                                      title, seeders)
                        continue

                    torrent_size, _, unit = cells[labels.index('Size')].contents
                    size = convert_size('{0} {1}'.format(torrent_size, unit)) or -1

                    pubdate_raw = cells[labels.index('Added')].get_text()
                    pubdate = self.parse_pubdate(pubdate_raw)

                    item = {
                        'title': title,
                        'link': download_url,
                        'size': size,
                        'seeders': seeders,
                        'leechers': leechers,
                        'pubdate': pubdate,
                    }
                    if mode != 'RSS':
                        log.debug('Found result: {0} with {1} seeders and {2} leechers',
                                  title, seeders, leechers)

                    items.append(item)
                except (AttributeError, TypeError, KeyError, ValueError, IndexError):
                    log.exception('Failed parsing provider.')

        return items
Example #14
0
    def parse(self, data, mode):
        """
        Parse search results for items.

        :param data: The raw response from a search
        :param mode: The current mode used to search, e.g. RSS

        :return: A list of items found
        """
        items = []

        with BS4Parser(data, 'html5lib') as soup:
            torrent_table = soup.find('table', class_='listing')
            torrent_rows = torrent_table('tr') if torrent_table else []

            # Continue only if at least one release is found
            if len(torrent_rows) < 2:
                log.debug('Data returned from provider does not contain any torrents')
                return items

            a = 1 if len(torrent_rows[0]('td')) < 2 else 0

            # Skip column headers
            for top, bot in zip(torrent_rows[a::2], torrent_rows[a + 1::2]):
                try:
                    desc_top = top.find('td', class_='desc-top')
                    title = desc_top.get_text(strip=True) if desc_top else None
                    download_url = desc_top.find('a')['href'] if desc_top else None
                    if not all([title, download_url]):
                        continue

                    stats = bot.find('td', class_='stats').get_text(strip=True)
                    sl = re.match(r'S:(?P<seeders>\d+)L:(?P<leechers>\d+)C:(?:\d+)ID:(?:\d+)', stats.replace(' ', ''))
                    seeders = try_int(sl.group('seeders')) if sl else 0
                    leechers = try_int(sl.group('leechers')) if sl else 0

                    # Filter unseeded torrent
                    if seeders < self.minseed:
                        if mode != 'RSS':
                            log.debug("Discarding torrent because it doesn't meet the"
                                      ' minimum seeders: {0}. Seeders: {1}',
                                      title, seeders)
                        continue

                    desc_bottom = bot.find('td', class_='desc-bot').get_text(strip=True)
                    size = convert_size(desc_bottom.split('|')[1].strip('Size: ')) or -1

                    item = {
                        'title': title,
                        'link': download_url,
                        'size': size,
                        'seeders': seeders,
                        'leechers': leechers,
                        'pubdate': None,
                    }
                    if mode != 'RSS':
                        log.debug('Found result: {0} with {1} seeders and {2} leechers',
                                  title, seeders, leechers)

                    items.append(item)
                except (AttributeError, TypeError, KeyError, ValueError, IndexError):
                    log.exception('Failed parsing provider.')

        return items
Example #15
0
    def parse(self, data, mode):
        """
        Parse search results for items.

        :param data: The raw response from a search
        :param mode: The current mode used to search, e.g. RSS

        :return: A list of items found
        """
        items = []

        with BS4Parser(data, 'html5lib') as html:
            torrent_table = html('div', class_='panel-body', limit=2)
            if mode != 'RSS':
                torrent_rows = torrent_table[1]('tr') if torrent_table else []
            else:
                torrent_rows = torrent_table[0]('tr') if torrent_table else []

            # Continue only if at least one release is found
            if len(torrent_rows) < 2:
                log.debug('Data returned from provider does not contain any torrents')
                return items

            # Skip column headers
            for row in torrent_rows[1:]:
                cells = row('td')

                try:
                    title = cells[1].find('a').get_text()
                    magnet = cells[2].find('a', title='Magnet link')['href']
                    download_url = '{magnet}{trackers}'.format(magnet=magnet,
                                                               trackers=self._custom_trackers)
                    if not all([title, download_url]):
                        continue

                    seeders = 1
                    leechers = 0
                    if len(cells) > 5:
                        peers = cells[5].find('div')
                        if peers and peers.get('title'):
                            peers = peers['title'].replace(',', '').split(' | ', 1)
                            # Removes 'Seeders: '
                            seeders = try_int(peers[0][9:])
                            # Removes 'Leechers: '
                            leechers = try_int(peers[1][10:])

                    # Filter unseeded torrent
                    if seeders < self.minseed:
                        if mode != 'RSS':
                            log.debug("Discarding torrent because it doesn't meet the"
                                      ' minimum seeders: {0}. Seeders: {1}',
                                      title, seeders)
                        continue

                    torrent_size = cells[3].get_text().replace(',', '')
                    size = convert_size(torrent_size) or -1

                    pubdate_raw = cells[4].get_text().replace('yesterday', '24 hours')
                    # "long ago" can't be translated to a date
                    if pubdate_raw == 'long ago':
                        pubdate_raw = None
                    pubdate = self.parse_pubdate(pubdate_raw, human_time=True)

                    item = {
                        'title': title,
                        'link': download_url,
                        'size': size,
                        'seeders': seeders,
                        'leechers': leechers,
                        'pubdate': pubdate,
                    }
                    if mode != 'RSS':
                        log.debug('Found result: {0} with {1} seeders and {2} leechers',
                                  title, seeders, leechers)

                    items.append(item)
                except (AttributeError, TypeError, KeyError, ValueError, IndexError):
                    log.exception('Failed parsing provider.')

        return items
Example #16
0
    def parse(self, data, mode):
        """
        Parse search results for items.

        :param data: The raw response from a search
        :param mode: The current mode used to search, e.g. RSS

        :return: A list of items found
        """
        items = []

        with BS4Parser(data, 'html.parser') as html:
            torrent_table = html.find('div', class_='browse')
            torrent_rows = torrent_table(
                'div', class_=re.compile('^line')) if torrent_table else []

            # Continue only if at least one release is found
            if len(torrent_rows) < 1:
                log.debug(
                    'Data returned from provider does not contain any torrents'
                )
                return items

            for row in torrent_rows:
                try:
                    heb_eng_title = row.find('div', class_='bTitle').find(
                        href=re.compile(r'details\.php')).find('b').get_text()
                    if '/' in heb_eng_title:
                        title = heb_eng_title.split('/')[1].strip()
                    elif '\\' in heb_eng_title:
                        title = heb_eng_title.split('\\')[1].strip()
                    else:
                        continue

                    download_id = row.find('div', class_='bTitle').find(
                        href=re.compile(r'download\.php'))['href']

                    if not all([title, download_id]):
                        continue

                    download_url = urljoin(self.url, download_id)

                    seeders = try_int(
                        row.find('div', class_='bUping').get_text(strip=True))
                    leechers = try_int(
                        row.find('div',
                                 class_='bDowning').get_text(strip=True))

                    # Filter unseeded torrent
                    if seeders < min(self.minseed, 1):
                        if mode != 'RSS':
                            log.debug(
                                "Discarding torrent because it doesn't meet the"
                                ' minimum seeders: {0}. Seeders: {1}', title,
                                seeders)
                        continue

                    torrent_size = row.find(
                        'div', class_='bSize').get_text(strip=True)
                    size = convert_size(torrent_size[5:], sep='') or -1

                    pubdate_raw = row.find('div', class_=re.compile(
                        'bHow')).find_all('span')[1].next_sibling.strip()
                    pubdate = self.parse_pubdate(pubdate_raw)

                    item = {
                        'title': title,
                        'link': download_url,
                        'size': size,
                        'seeders': seeders,
                        'leechers': leechers,
                        'pubdate': pubdate,
                    }
                    if mode != 'RSS':
                        log.debug(
                            'Found result: {0} with {1} seeders and {2} leechers',
                            title, seeders, leechers)

                    items.append(item)
                except (AttributeError, TypeError, KeyError, ValueError,
                        IndexError):
                    log.exception('Failed parsing provider.')

        return items
Example #17
0
    def parse(self, data, mode):
        """
        Parse search results for items.

        :param data: The raw response from a search
        :param mode: The current mode used to search, e.g. RSS

        :return: A list of items found
        """
        def process_column_header(td):
            result = ''
            if td.a and td.a.img:
                result = td.a.img.get('title', td.a.get_text(strip=True))
            if not result:
                result = td.get_text(strip=True)
            if not result and td.a and td.a.get('title'):
                result = td.a['title']
            return result

        items = []

        with BS4Parser(data, 'html5lib') as html:
            torrent_table = html.find('table', class_='torrent_table')
            torrent_rows = torrent_table('tr') if torrent_table else []

            # Continue only if at least one release is found
            if len(torrent_rows) < 2:
                log.debug(
                    'Data returned from provider does not contain any torrents'
                )
                return items

            # Need to only search one level deep for 'td' tags, as one of the td's also has a td.
            labels = [
                process_column_header(label)
                for label in torrent_rows[0].find_all('td', recursive=False)
            ]

            # Skip column headers
            for row in torrent_rows[1:]:
                cells = row.find_all('td', recursive=False)
                if len(cells) < len(labels):
                    continue

                try:
                    # Skip if torrent has been nuked due to poor quality
                    if row.find('img', alt='Nuked'):
                        continue

                    title = cells[labels.index('Name')].find(
                        'a', class_='overlay_torrent').get_text(strip=True)
                    download_url = urljoin(
                        self.url,
                        cells[labels.index('Name')].find('a')['href'])
                    if not all([title, download_url]):
                        continue

                    seeders = int(cells[labels.index('Seeders')].get_text(
                        strip=True).replace(',', ''))
                    leechers = int(cells[labels.index('Leechers')].get_text(
                        strip=True).replace(',', ''))

                    # Filter unseeded torrent
                    if seeders < self.minseed:
                        if mode != 'RSS':
                            log.debug(
                                "Discarding torrent because it doesn't meet the"
                                ' minimum seeders: {0}. Seeders: {1}', title,
                                seeders)
                        continue

                    units = ['B', 'KIB', 'MIB', 'GIB', 'TB', 'PB']

                    torrent_size = cells[labels.index('Size')].get_text(
                        strip=True)
                    size = convert_size(torrent_size, units=units) or -1

                    pubdate_raw = cells[3].find('span')['title']
                    pubdate = self.parse_pubdate(pubdate_raw)

                    item = {
                        'title': title,
                        'link': download_url,
                        'size': size,
                        'seeders': seeders,
                        'leechers': leechers,
                        'pubdate': pubdate,
                    }
                    if mode != 'RSS':
                        log.debug(
                            'Found result: {0} with {1} seeders and {2} leechers',
                            title, seeders, leechers)

                    items.append(item)
                except (AttributeError, TypeError, KeyError, ValueError,
                        IndexError):
                    log.exception('Failed parsing provider.')

        return items
Example #18
0
    def parse(self, data, mode):
        """
        Parse search results for items.

        :param data: The raw response from a search
        :param mode: The current mode used to search, e.g. RSS

        :return: A list of items found
        """
        def process_column_header(td):
            result = ''
            if td.a:
                result = td.a.get('title')
            if not result:
                result = td.get_text(strip=True)
            return result

        items = []

        with BS4Parser(data, 'html5lib') as html:
            torrent_table = html.find('table', id='torrenttable')
            torrent_rows = torrent_table('tr') if torrent_table else []

            # Continue only if at least one release is found
            if len(torrent_rows) < 2:
                log.debug(
                    'Data returned from provider does not contain any torrents'
                )
                return items

            labels = [
                process_column_header(label) for label in torrent_rows[0]('th')
            ]

            # Skip column headers
            for row in torrent_rows[1:]:
                cells = row('td')

                try:
                    name = cells[labels.index('Name')]
                    title = name.find('a').get_text(strip=True)
                    download_url = row.find('td',
                                            class_='quickdownload').find('a')
                    if not all([title, download_url]):
                        continue

                    download_url = urljoin(self.url, download_url['href'])

                    seeders = int(
                        cells[labels.index('Seeders')].get_text(strip=True))
                    leechers = int(
                        cells[labels.index('Leechers')].get_text(strip=True))

                    # Filter unseeded torrent
                    if seeders < min(self.minseed, 1):
                        if mode != 'RSS':
                            log.debug(
                                "Discarding torrent because it doesn't meet the"
                                " minimum seeders: {0}. Seeders: {1}", title,
                                seeders)
                        continue

                    torrent_size = cells[labels.index('Size')].get_text()
                    size = convert_size(torrent_size) or -1

                    pubdate_raw = name.get_text(strip=True)[-19:]
                    pubdate = self.parse_pubdate(pubdate_raw)

                    item = {
                        'title': title,
                        'link': download_url,
                        'size': size,
                        'seeders': seeders,
                        'leechers': leechers,
                        'pubdate': pubdate,
                    }
                    if mode != 'RSS':
                        log.debug(
                            'Found result: {0} with {1} seeders and {2} leechers',
                            title, seeders, leechers)

                    items.append(item)
                except (AttributeError, TypeError, KeyError, ValueError,
                        IndexError):
                    log.exception('Failed parsing provider.')

        return items
Example #19
0
    def parse(self, data, mode):
        """
        Parse search results for items.

        :param data: The raw response from a search
        :param mode: The current mode used to search, e.g. RSS

        :return: A list of items found
        """
        items = []

        def process_column_header(th):
            return th.span.get_text() if th.span else th.get_text()

        with BS4Parser(data, 'html5lib') as html:
            torrent_table = html.find('table', class_='table2')

            if not torrent_table:
                log.debug('Data returned from provider does not contain any {0}torrents',
                          'confirmed ' if self.confirmed else '')
                return items

            torrent_rows = torrent_table.find_all('tr')
            labels = [process_column_header(label) for label in torrent_rows[0].find_all('th')]

            # Skip the first row, since it isn't a valid result
            for row in torrent_rows[1:]:
                cells = row.find_all('td')

                try:
                    title_cell = cells[labels.index('Torrent Name')]

                    verified = title_cell.find('img', title='Verified torrent')
                    if self.confirmed and not verified:
                        continue

                    title_anchors = title_cell.find_all('a')
                    if not title_anchors or len(title_anchors) < 2:
                        continue

                    title_url = title_anchors[0].get('href')
                    title = title_anchors[1].get_text(strip=True)
                    regex_result = id_regex.search(title_anchors[1].get('href'))

                    alt_title = regex_result.group(1)
                    if len(title) < len(alt_title):
                        title = alt_title.replace('-', ' ')

                    info_hash = hash_regex.search(title_url).group(2)
                    if not all([title, info_hash]):
                        continue

                    download_url = 'magnet:?xt=urn:btih:{hash}&dn={title}{trackers}'.format(
                        hash=info_hash, title=title, trackers=self._custom_trackers)

                    # Remove comma as thousands separator from larger number like 2,000 seeders = 2000
                    seeders = try_int(cells[labels.index('Seed')].get_text(strip=True).replace(',', ''))
                    leechers = try_int(cells[labels.index('Leech')].get_text(strip=True).replace(',', ''))

                    if seeders < self.minseed:
                        if mode != 'RSS':
                            log.debug("Discarding torrent because it doesn't meet the"
                                      ' minimum seeders: {0}. Seeders: {1}',
                                      title, seeders)
                        continue

                    size = convert_size(cells[labels.index('Size')].get_text(strip=True)) or -1

                    pubdate_raw = cells[1].get_text().replace('Last', '1').replace('Yesterday', '24 hours')
                    pubdate = self.parse_pubdate(pubdate_raw, human_time=True)

                    item = {
                        'title': title,
                        'link': download_url,
                        'size': size,
                        'seeders': seeders,
                        'leechers': leechers,
                        'pubdate': pubdate,
                    }
                    if mode != 'RSS':
                        log.debug('Found result: {0} with {1} seeders and {2} leechers',
                                  title, seeders, leechers)

                    items.append(item)
                except (AttributeError, TypeError, KeyError, ValueError, IndexError):
                    log.exception('Failed parsing provider.')

        return items
Example #20
0
    def parse(self, data, mode):
        """
        Parse search results for items.

        :param data: The raw response from a search
        :param mode: The current mode used to search, e.g. RSS.

        :return: A list of items found
        """
        items = []

        with BS4Parser(data, 'html5lib') as html:
            torrent_table = html.find(class_='ttable_headinner')
            torrent_rows = torrent_table('tr') if torrent_table else []

            # Continue only if at least one release is found
            if len(torrent_rows) < 2:
                log.debug(
                    'Data returned from provider does not contain any torrents'
                )
                return items

            # Catégorie, Release, Date, DL, Size, C, S, L
            labels = [
                label.get_text(strip=True) for label in torrent_rows[0]('th')
            ]

            for torrent in torrent_rows[1:]:
                cells = torrent('td')
                if len(cells) < len(labels):
                    continue

                try:
                    torrent_id = re.search(
                        'id=([0-9]+)',
                        cells[labels.index('Nom')].find('a')['href']).group(1)
                    title = cells[labels.index('Nom')].get_text(strip=True)
                    if not all([title, torrent_id]):
                        continue

                    download_url = self.urls['download'].format(torrent_id)

                    seeders = int(
                        cells[labels.index('S')].get_text(strip=True))
                    leechers = int(
                        cells[labels.index('L')].get_text(strip=True))

                    # Filter unseeded torrent
                    if seeders < self.minseed:
                        if mode != 'RSS':
                            log.debug(
                                "Discarding torrent because it doesn't meet the"
                                ' minimum seeders: {0}. Seeders: {1}', title,
                                seeders)
                        continue

                    torrent_size = cells[labels.index('Taille')].get_text()
                    size = convert_size(torrent_size, default=-1)

                    date_raw = torrent('a')[2]['onmouseover']
                    pubdate_raw = re.search(
                        r'Poster le: <\/b>(\d{2}-\d{2}-\d{4})', date_raw)
                    pubdate = self.parse_pubdate(pubdate_raw.group(1),
                                                 dayfirst=True)

                    item = {
                        'title': title,
                        'link': download_url,
                        'size': size,
                        'seeders': seeders,
                        'leechers': leechers,
                        'pubdate': pubdate
                    }

                    if mode != 'RSS':
                        log.debug(
                            'Found result: {0} with {1} seeders and {2} leechers',
                            title, seeders, leechers)

                    items.append(item)
                except (AttributeError, TypeError, KeyError, ValueError,
                        IndexError):
                    log.exception('Failed parsing provider.')

        return items
Example #21
0
    def parse(self, data, mode):
        """
        Parse search results for items.

        :param data: The raw response from a search
        :param mode: The current mode used to search, e.g. RSS

        :return: A list of items found
        """
        items = []

        with BS4Parser(data, 'html5lib') as html:
            torrent_table = html.find('table', {'id': 'torrent_table'})

            # Continue only if at least one release is found
            if not torrent_table:
                log.debug(
                    'Data returned from provider does not contain any {0}torrents',
                    'ranked ' if self.ranked else '')
                return items

            torrent_body = torrent_table.find('tbody')
            torrent_rows = torrent_body.contents
            del torrent_rows[1::2]

            for row in torrent_rows[1:]:
                try:
                    torrent = row('td')
                    if len(torrent) <= 1:
                        break

                    all_as = (torrent[1])('a')
                    notinternal = row.find(
                        'img', src='/static//common/user_upload.png')
                    if self.ranked and notinternal:
                        log.debug(
                            'Found a user uploaded release, Ignoring it..')
                        continue

                    freeleech = row.find(
                        'img', src='/static//common/browse/freeleech.png')
                    if self.freeleech and not freeleech:
                        continue

                    title = all_as[2].string
                    download_url = urljoin(self.url, all_as[0].attrs['href'])
                    if not all([title, download_url]):
                        continue

                    seeders = try_int((row('td')[6]).text.replace(',', ''))
                    leechers = try_int((row('td')[7]).text.replace(',', ''))

                    # Filter unseeded torrent
                    if seeders < min(self.minseed, 1):
                        if mode != 'RSS':
                            log.debug(
                                "Discarding torrent because it doesn't meet the"
                                " minimum seeders: {0}. Seeders: {1}", title,
                                seeders)
                        continue

                    torrent_size = row.find(
                        'td', class_='nobr').find_next_sibling('td').string
                    if torrent_size:
                        size = convert_size(torrent_size) or -1

                    pubdate_raw = row.find('td',
                                           class_='nobr').find('span')['title']
                    pubdate = self.parse_pubdate(pubdate_raw)

                    item = {
                        'title': title,
                        'link': download_url,
                        'size': size,
                        'seeders': seeders,
                        'leechers': leechers,
                        'pubdate': pubdate,
                    }
                    if mode != 'RSS':
                        log.debug(
                            'Found result: {0} with {1} seeders and {2} leechers',
                            title, seeders, leechers)

                    items.append(item)
                except (AttributeError, TypeError, KeyError, ValueError,
                        IndexError):
                    log.error('Failed parsing provider. Traceback: {0!r}',
                              traceback.format_exc())

        return items
Example #22
0
    def parse(self, data, mode):
        """
        Parse search results for items.

        :param data: The raw response from a search
        :param mode: The current mode used to search, e.g. RSS

        :return: A list of items found
        """
        items = []

        torrent_rows = data.get('torrent_results', {})
        if not torrent_rows:
            log.debug(
                'Data returned from provider does not contain any torrents')
            return items

        for row in torrent_rows:
            try:
                title = row.pop('title')
                download_url = row.pop('download') + self._custom_trackers
                if not all([title, download_url]):
                    continue

                seeders = row.pop('seeders', 0)
                leechers = row.pop('leechers', 0)

                # Filter unseeded torrent
                if seeders < self.minseed:
                    if mode != 'RSS':
                        log.debug(
                            "Discarding torrent because it doesn't meet the"
                            ' minimum seeders: {0}. Seeders: {1}', title,
                            seeders)
                    continue

                torrent_size = row.pop('size', None)
                size = convert_size(torrent_size, default=-1)

                pubdate_raw = row.pop('pubdate', None)
                pubdate = self.parse_pubdate(pubdate_raw)

                item = {
                    'title': title,
                    'link': download_url,
                    'size': size,
                    'seeders': seeders,
                    'leechers': leechers,
                    'pubdate': pubdate,
                }
                if mode != 'RSS':
                    log.debug(
                        'Found result: {0} with {1} seeders and {2} leechers',
                        title, seeders, leechers)

                items.append(item)
            except (AttributeError, TypeError, KeyError, ValueError,
                    IndexError):
                log.exception('Failed parsing provider.')

        return items
Example #23
0
    def parse(self, data, mode):
        """
        Parse search results for items.

        :param data: The raw response from a search
        :param mode: The current mode used to search, e.g. RSS

        :return: A list of items found
        """
        items = []

        with BS4Parser(data, 'html5lib') as html:
            torrent_table = html.find('table', id='torrents')
            torrents = torrent_table('tr') if torrent_table else []

            # Continue only if at least one release is found
            if len(torrents) < 2 or html.find(text='No Torrents Found!'):
                log.debug('Data returned from provider does not contain any torrents')
                return items

            # Skip column headers
            for row in torrents[1:]:
                try:
                    title = row('td')[1].find('a').text
                    download_url = self.urls['base_url'] + row('td')[3].find('a')['href']
                    if not all([title, download_url]):
                        continue

                    seeders = int(row.find('td', attrs={'class': 'ac t_seeders'}).text)
                    leechers = int(row.find('td', attrs={'class': 'ac t_leechers'}).text)

                    # Filter unseeded torrent
                    if seeders < self.minseed:
                        if mode != 'RSS':
                            log.debug("Discarding torrent because it doesn't meet the"
                                      ' minimum seeders: {0}. Seeders: {1}',
                                      title, seeders)
                        continue

                    torrent_size = row('td')[5].text
                    size = convert_size(torrent_size) or -1

                    pubdate_raw = row('td')[1].find('div').get_text().split('|')[-1].strip()
                    pubdate = self.parse_pubdate(pubdate_raw, human_time=True)

                    item = {
                        'title': title,
                        'link': download_url,
                        'size': size,
                        'seeders': seeders,
                        'leechers': leechers,
                        'pubdate': pubdate,
                    }
                    if mode != 'RSS':
                        log.debug('Found result: {0} with {1} seeders and {2} leechers',
                                  title, seeders, leechers)

                    items.append(item)
                except (AttributeError, TypeError, KeyError, ValueError, IndexError):
                    log.exception('Failed parsing provider.')

        return items
Example #24
0
    def parse(self, data, mode):
        """
        Parse search results for items.

        :param data: The raw response from a search
        :param mode: The current mode used to search, e.g. RSS.

        :return: A list of items found
        """
        items = []

        with BS4Parser(data, 'html5lib') as html:
            torrent_table = html.find(class_='ttable_headinner')
            torrent_rows = torrent_table('tr') if torrent_table else []

            # Continue only if at least one release is found
            if len(torrent_rows) < 2:
                log.debug('Data returned from provider does not contain any torrents')
                return items

            # Catégorie, Release, Date, DL, Size, C, S, L
            labels = [label.get_text(strip=True) for label in torrent_rows[0]('th')]

            for torrent in torrent_rows[1:]:
                cells = torrent('td')
                if len(cells) < len(labels):
                    continue

                try:
                    torrent_id = re.search('id=([0-9]+)', cells[labels.index('Nom')].find('a')['href']).group(1)
                    title = cells[labels.index('Nom')].get_text(strip=True)
                    if not all([title, torrent_id]):
                        continue

                    download_url = self.urls['download'].format(torrent_id)

                    seeders = int(cells[labels.index('S')].get_text(strip=True))
                    leechers = int(cells[labels.index('L')].get_text(strip=True))

                    # Filter unseeded torrent
                    if seeders < self.minseed:
                        if mode != 'RSS':
                            log.debug("Discarding torrent because it doesn't meet the"
                                      ' minimum seeders: {0}. Seeders: {1}',
                                      title, seeders)
                        continue

                    torrent_size = cells[labels.index('Taille')].get_text()
                    size = convert_size(torrent_size, default=-1)

                    date_raw = torrent('a')[2]['onmouseover']
                    pubdate_raw = re.search(r'Poster le: <\/b>(\d{2}-\d{2}-\d{4})', date_raw)
                    pubdate = self.parse_pubdate(pubdate_raw.group(1), dayfirst=True)

                    item = {
                        'title': title,
                        'link': download_url,
                        'size': size,
                        'seeders': seeders,
                        'leechers': leechers,
                        'pubdate': pubdate
                    }

                    if mode != 'RSS':
                        log.debug('Found result: {0} with {1} seeders and {2} leechers',
                                  title, seeders, leechers)

                    items.append(item)
                except (AttributeError, TypeError, KeyError, ValueError, IndexError):
                    log.exception('Failed parsing provider.')

        return items
Example #25
0
    def parse(self, data, mode):
        """
        Parse search results for items.

        :param data: The raw response from a search
        :param mode: The current mode used to search, e.g. RSS

        :return: A list of items found
        """
        items = []

        with BS4Parser(data, 'html5lib') as html:
            torrent_table = html('div', class_='panel-body', limit=2)
            if mode != 'RSS':
                torrent_rows = torrent_table[1]('tr') if torrent_table else []
            else:
                torrent_rows = torrent_table[0]('tr') if torrent_table else []

            # Continue only if at least one release is found
            if len(torrent_rows) < 2:
                log.debug(
                    'Data returned from provider does not contain any torrents'
                )
                return items

            # Skip column headers
            for row in torrent_rows[1:]:
                cells = row('td')

                try:
                    title = cells[1].find('a').get_text()
                    magnet = cells[2].find('a', title='Magnet link')['href']
                    download_url = '{magnet}{trackers}'.format(
                        magnet=magnet, trackers=self._custom_trackers)
                    if not all([title, download_url]):
                        continue

                    seeders = 1
                    leechers = 0
                    if len(cells) > 5:
                        peers = cells[5].find('div')
                        if peers and peers.get('title'):
                            peers = peers['title'].replace(',',
                                                           '').split(' | ', 1)
                            # Removes 'Seeders: '
                            seeders = try_int(peers[0][9:])
                            # Removes 'Leechers: '
                            leechers = try_int(peers[1][10:])

                    # Filter unseeded torrent
                    if seeders < self.minseed:
                        if mode != 'RSS':
                            log.debug(
                                "Discarding torrent because it doesn't meet the"
                                ' minimum seeders: {0}. Seeders: {1}', title,
                                seeders)
                        continue

                    torrent_size = cells[3].get_text().replace(',', '')
                    size = convert_size(torrent_size) or -1

                    pubdate_raw = cells[4].get_text().replace(
                        'yesterday', '24 hours')
                    # "long ago" can't be translated to a date
                    if pubdate_raw == 'long ago':
                        pubdate_raw = None
                    pubdate = self.parse_pubdate(pubdate_raw, human_time=True)

                    item = {
                        'title': title,
                        'link': download_url,
                        'size': size,
                        'seeders': seeders,
                        'leechers': leechers,
                        'pubdate': pubdate,
                    }
                    if mode != 'RSS':
                        log.debug(
                            'Found result: {0} with {1} seeders and {2} leechers',
                            title, seeders, leechers)

                    items.append(item)
                except (AttributeError, TypeError, KeyError, ValueError,
                        IndexError):
                    log.exception('Failed parsing provider.')

        return items
Example #26
0
    def parse(self, data, mode, show=None):
        """
        Parse search results for items.

        :param data: The raw response from a search
        :param mode: The current mode used to search, e.g. RSS

        :return: A list of items found
        """
        def is_season_exception(series_name):
            """Try to detect by series name, if this is a season exception."""
            if not show:
                return

            return get_season_from_name(show, series_name)

        items = []

        group_rows = data.get('Groups')
        if not group_rows:
            log.debug(
                'Data returned from provider does not contain any torrents')
            return items

        for group in group_rows:
            torrent_rows = group.get('Torrents')
            if not torrent_rows:
                continue

            for row in torrent_rows:
                properties_string = row.get('Property').rstrip(' |').replace(
                    ' ', '')
                # Hack for the h264 10bit stuff
                properties_string = properties_string.replace(
                    'h26410-bit', 'h264|hi10p')
                properties = properties_string.split('|')
                download_url = row.get('Link')
                if not (download_url or all(properties)):
                    continue

                # Get rid of freeleech from properties
                if properties[-1] == 'Freeleech':
                    del properties[-1]
                elif self.freeleech:
                    # Discard if we wanted free leech
                    continue

                tags = '{torrent_source}.{torrent_container}.{torrent_codec}.{torrent_res}.' \
                       '{torrent_audio}'.format(torrent_source=properties[0],
                                                torrent_container=properties[1],
                                                torrent_codec=properties[2],
                                                torrent_res=properties[3],
                                                torrent_audio=properties[4])

                last_field = re.match(r'(.*)\((.*)\)', properties[-1])

                # subs = last_field.group(1) if last_field else ''
                release_group = '-{0}'.format(
                    last_field.group(2)) if last_field else ''

                release_type = OTHER
                season = None
                episode = None
                multi_ep_start = None
                multi_ep_end = None
                title = None

                # Attempt and get a season or episode number
                title_info = row.get('EditionData').get('EditionTitle')

                if title_info != '':
                    if title_info.startswith('Episodes'):
                        multi_ep_match = re.match(r'Episodes (\d+)-(\d+)',
                                                  title_info)
                        if multi_ep_match:
                            multi_ep_start = multi_ep_match.group(1)
                            multi_ep_end = multi_ep_match.group(2)
                        release_type = MULTI_EP
                    elif title_info.startswith('Episode'):
                        episode = re.match('^Episode.([0-9]+)',
                                           title_info).group(1)
                        release_type = SINGLE_EP

                        season_match = re.match(r'.+[sS]eason.(\d+)$',
                                                group.get('SeriesName'))
                        if season_match:
                            season = season_match.group(1)
                    elif title_info.startswith('Season'):
                        if re.match(r'Season.[0-9]+-[0-9]+.\([0-9-]+\)',
                                    title_info):
                            # We can read the season AND the episodes, but we can only process multiep.
                            # So i've chosen to use it like 12-23 or 1-12.
                            match = re.match(
                                r'Season.([0-9]+)-([0-9]+).\(([0-9-]+)\)',
                                title_info)
                            episode = match.group(3).upper()
                            season = '{0}-{1}'.format(match.group(1),
                                                      match.group(2))
                            release_type = MULTI_SEASON
                        else:
                            season = re.match('Season.([0-9]+)',
                                              title_info).group(1)
                            release_type = SEASON_PACK
                elif group.get('EpCount') > 0 and group.get(
                        'GroupName') != 'TV Special':
                    # This is a season pack.
                    # 13 episodes -> SXXEXX-EXX
                    episode = int(group.get('EpCount'))
                    multi_ep_start = 1
                    multi_ep_end = episode
                    # Because we sometime get names without a season number, like season scene exceptions.
                    # This is the most reliable way of creating a multi-episode release name.
                    release_type = MULTI_EP

                # These are probably specials which we just can't handle anyways
                if release_type == OTHER:
                    continue

                if release_type == SINGLE_EP:
                    # Create the single episode release_name (use the shows default title)
                    if is_season_exception(group.get('SeriesName')):
                        # If this is a season exception, we can't parse the release name like:
                        #  Show.Title.Season.3.Exception.S01E01...
                        # As that will confuse the parser, as it already has a season available.
                        # We have to omit the season, to have it search for a season exception.
                        title = '{title}.{episode}.{tags}' \
                                '{release_group}'.format(title=group.get('SeriesName'),
                                                         episode='E{0:02d}'.format(int(episode)),
                                                         tags=tags,
                                                         release_group=release_group)
                    else:
                        title = '{title}.{season}.{episode}.{tags}' \
                                '{release_group}'.format(title=group.get('SeriesName'),
                                                         season='S{0:02d}'.format(int(season)) if season else 'S01',
                                                         episode='E{0:02d}'.format(int(episode)),
                                                         tags=tags,
                                                         release_group=release_group)
                if release_type == MULTI_EP:
                    # Create the multi-episode release_name
                    # Multiple.Episode.TV.Show.SXXEXX-EXX[Episode.Part].[Episode.Title].TAGS.[LANGUAGE].720p.FORMAT.x264-GROUP
                    if is_season_exception(group.get('SeriesName')):
                        # If this is a season exception, we can't parse the release name like:
                        #  Show.Title.Season.3.Exception.S01E01-E13...
                        # As that will confuse the parser, as it already has a season available.
                        # We have to omit the season, to have it search for a season exception.
                        # Example: Show.Title.Season.3.Exception.E01-E13...
                        title = '{title}.{multi_episode_start}-{multi_episode_end}.{tags}' \
                                '{release_group}'.format(title=group.get('SeriesName'),
                                                         multi_episode_start='E{0:02d}'.format(int(multi_ep_start)),
                                                         multi_episode_end='E{0:02d}'.format(int(multi_ep_end)),
                                                         tags=tags,
                                                         release_group=release_group)
                    else:
                        title = '{title}.{season}{multi_episode_start}-{multi_episode_end}.{tags}' \
                                '{release_group}'.format(title=group.get('SeriesName'),
                                                         season='S{0:02d}'.format(season) if season else 'S01',
                                                         multi_episode_start='E{0:02d}'.format(int(multi_ep_start)),
                                                         multi_episode_end='E{0:02d}'.format(int(multi_ep_end)),
                                                         tags=tags,
                                                         release_group=release_group)
                if release_type == SEASON_PACK:
                    # Create the season pack release_name
                    # if `Season` is already in the SeriesName, we ommit adding it another time.
                    title = '{title}.{season}.{tags}' \
                        '{release_group}'.format(title=group.get('SeriesName'),
                                                 season='S{0:02d}'.format(int(season)) if season else 'S01',
                                                 tags=tags,
                                                 release_group=release_group)

                if release_type == MULTI_SEASON:
                    # Create the multi season pack release_name
                    # Multiple.Episode.TV.Show.EXX-EXX[Episode.Part].[Episode.Title].TAGS.[LANGUAGE].720p.FORMAT.x264-GROUP
                    title = '{title}.{episode}.{tags}' \
                            '{release_group}'.format(title=group.get('SeriesName'),
                                                     episode=episode,
                                                     tags=tags,
                                                     release_group=release_group)

                seeders = row.get('Seeders')
                leechers = row.get('Leechers')
                pubdate = self.parse_pubdate(row.get('UploadTime'))

                # Filter unseeded torrent
                if seeders < self.minseed:
                    if mode != 'RSS':
                        log.debug(
                            "Discarding torrent because it doesn't meet the"
                            ' minimum seeders: {0}. Seeders: {1}', title,
                            seeders)
                    continue

                size = convert_size(row.get('Size'), default=-1)

                item = {
                    'title': title,
                    'link': download_url,
                    'size': size,
                    'seeders': seeders,
                    'leechers': leechers,
                    'pubdate': pubdate,
                }

                if mode != 'RSS':
                    log.debug(
                        'Found result: {0} with {1} seeders and {2} leechers',
                        title, seeders, leechers)

                items.append(item)

        return items
Example #27
0
    def parse(self, data, mode):
        """
        Parse search results for items.

        :param data: The raw response from a search
        :param mode: The current mode used to search, e.g. RSS

        :return: A list of items found
        """
        # Units
        units = ['B', 'KB', 'MB', 'GB', 'TB', 'PB']

        def process_column_header(td):
            result = ''
            if td.a and td.a.img:
                result = td.a.img.get('title', td.a.get_text(strip=True))
            if not result:
                result = td.get_text(strip=True)
            return result

        items = []

        with BS4Parser(data, 'html5lib') as html:
            torrent_table = html.find('div', id='torrentBrowse')
            torrent_rows = torrent_table('tr') if torrent_table else []

            # Continue only if at least one release is found
            if len(torrent_rows) < 2:
                log.debug('Data returned from provider does not contain any torrents')
                return items

            labels = [process_column_header(label) for label in torrent_rows[0]('td')]

            # Skip column headers
            for row in torrent_rows[1:]:
                cells = row('td')
                if len(cells) < len(labels):
                    continue

                try:

                    title_anchor = cells[labels.index('Name')].find('a').find_next('a') or \
                        cells[labels.index('Name')].find('a')
                    title = title_anchor.get('title') if title_anchor else None
                    download_url = urljoin(self.url, cells[labels.index('DL')].find('a')['href'])
                    if not all([title, download_url]):
                        continue

                    peers = cells[labels.index('S/L')].get_text(strip=True).split('/', 1)
                    seeders = try_int(peers[0])
                    leechers = try_int(peers[1])

                    # Filter unseeded torrent
                    if seeders < min(self.minseed, 1):
                        if mode != 'RSS':
                            log.debug("Discarding torrent because it doesn't meet the"
                                      " minimum seeders: {0}. Seeders: {1}",
                                      title, seeders)
                        continue

                    torrent_size = cells[labels.index('Size/Snatched')].get_text(strip=True).split('/', 1)[0]
                    size = convert_size(torrent_size, units=units) or -1

                    pubdate_raw = cells[labels.index('Added')].get_text(' ')
                    pubdate = self.parse_pubdate(pubdate_raw)

                    item = {
                        'title': title,
                        'link': download_url,
                        'size': size,
                        'seeders': seeders,
                        'leechers': leechers,
                        'pubdate': pubdate,
                    }
                    if mode != 'RSS':
                        log.debug('Found result: {0} with {1} seeders and {2} leechers',
                                  title, seeders, leechers)

                    items.append(item)
                except (AttributeError, TypeError, KeyError, ValueError, IndexError):
                    log.error('Failed parsing provider. Traceback: {0!r}',
                              traceback.format_exc())

        return items
Example #28
0
    def parse(self, data, mode):
        """
        Parse search results for items.

        :param data: The raw response from a search
        :param mode: The current mode used to search, e.g. RSS

        :return: A list of items found
        """
        items = []

        with BS4Parser(data, 'html5lib') as html:
            torrent_table = html.find('table', {'id': 'torrentsTable'})
            if torrent_table:
                torrent_rows = torrent_table.find_all('tr')

            # Continue only if at least one release is found
            if len(torrent_rows) < 2:
                log.debug('Data returned from provider does not contain any torrents')
                return items

            # Skip column headers
            for row in torrent_rows[1:]:
                try:
                    torrent_items = row.find_all('td')
                    title = torrent_items[1].find('a').get_text(strip=True)
                    download_url = torrent_items[2].find('a')['href']
                    if not all([title, download_url]):
                        continue
                    download_url = urljoin(self.url, download_url)

                    seeders = try_int(torrent_items[5].get_text(strip=True))
                    leechers = try_int(torrent_items[6].get_text(strip=True))

                    # Filter unseeded torrent
                    if seeders < self.minseed:
                        if mode != 'RSS':
                            log.debug("Discarding torrent because it doesn't meet the"
                                      ' minimum seeders: {0}. Seeders: {1}',
                                      title, seeders)
                        continue

                    torrent_size = torrent_items[4].get_text()
                    size = convert_size(torrent_size) or -1

                    pubdate_raw = torrent_items[1].find('div').get_text()
                    pubdate = self.parse_pubdate(pubdate_raw, human_time=True)

                    item = {
                        'title': title,
                        'link': download_url,
                        'size': size,
                        'seeders': seeders,
                        'leechers': leechers,
                        'pubdate': pubdate,
                    }
                    if mode != 'RSS':
                        log.debug('Found result: {0} with {1} seeders and {2} leechers',
                                  title, seeders, leechers)

                    items.append(item)
                except (AttributeError, TypeError, KeyError, ValueError, IndexError):
                    log.exception('Failed parsing provider')

        return items
Example #29
0
    def parse(self, data, mode):
        """
        Parse search results for items.

        :param data: The raw response from a search
        :param mode: The current mode used to search, e.g. RSS

        :return: A list of items found
        """
        items = []

        with BS4Parser(data, 'html5lib') as html:
            # Continue only if at least one release is found
            empty = html.find('h2',
                              text='No .torrents fit this filter criteria')
            if empty:
                log.debug(
                    'Data returned from provider does not contain any torrents'
                )
                return items

            torrent_table = html.find(
                'table', attrs={'style': 'border: none; width: 100%;'})
            torrent_rows = torrent_table(
                'tr', class_='browse') if torrent_table else []

            for row in torrent_rows:
                cells = row('td')

                try:
                    title = cells[1].find('a').get('title')
                    torrent_url = cells[2].find('a').get('href')
                    download_url = urljoin(self.url, torrent_url)
                    if not all([title, torrent_url]):
                        continue

                    seeders = try_int(cells[9].get_text(), 1)
                    leechers = try_int(cells[10].get_text())

                    # Filter unseeded torrent
                    if seeders < self.minseed:
                        if mode != 'RSS':
                            log.debug(
                                "Discarding torrent because it doesn't meet the"
                                ' minimum seeders: {0}. Seeders: {1}', title,
                                seeders)
                        continue

                    torrent_size = self._norm_size(
                        cells[7].get_text(strip=True))
                    size = convert_size(torrent_size) or -1

                    pubdate_raw = cells[5].get_text()
                    pubdate = self.parse_pubdate(pubdate_raw, human_time=True)

                    item = {
                        'title': title,
                        'link': download_url,
                        'size': size,
                        'seeders': seeders,
                        'leechers': leechers,
                        'pubdate': pubdate,
                    }
                    if mode != 'RSS':
                        log.debug(
                            'Found result: {0} with {1} seeders and {2} leechers',
                            title, seeders, leechers)

                    items.append(item)
                except (AttributeError, TypeError, KeyError, ValueError,
                        IndexError):
                    log.exception('Failed parsing provider.')

        return items
Example #30
0
    def parse(self, data, mode):
        """
        Parse search results for items.

        :param data: The raw response from a search
        :param mode: The current mode used to search, e.g. RSS

        :return: A list of items found
        """
        items = []

        with BS4Parser(data, 'html5lib') as html:
            if not html:
                log.debug('No html data parsed from provider')
                return items

            torrents = html('tr')
            if not torrents or len(torrents) < 2:
                log.debug('Data returned from provider does not contain any torrents')
                return items

            # Skip column headers
            for row in torrents[1:]:
                # Skip extraneous rows at the end
                if len(row.contents) < 10:
                    continue

                try:
                    comments_counter = row.find_all('td', class_='lista', attrs={'align': 'center'})[4].find('a')
                    if comments_counter:
                        title = comments_counter['title'][10:]
                    else:
                        title = row.find('td', class_='lista', attrs={'align': 'left'}).find('a').get_text()
                    dl_href = row.find('td', class_='lista', attrs={'width': '20',
                                                                    'style': 'text-align: center;'}).find('a').get('href')
                    download_url = urljoin(self.url, dl_href)
                    if not all([title, dl_href]):
                        continue

                    seeders = try_int(row.find('span', class_='seedy').find('a').get_text(), 1)
                    leechers = try_int(row.find('span', class_='leechy').find('a').get_text())

                    # Filter unseeded torrent
                    if seeders < self.minseed:
                        if mode != 'RSS':
                            log.debug("Discarding torrent because it doesn't meet the"
                                      ' minimum seeders: {0}. Seeders: {1}',
                                      title, seeders)
                        continue

                    torrent_size = row.find('td', class_='lista222', attrs={'width': '100%'}).get_text()
                    size = convert_size(torrent_size) or -1

                    pubdate_td = row.find_all('td', class_='lista', attrs={'align': 'center'})[3]
                    pubdate_human_offset = pubdate_td.find('b')
                    if pubdate_human_offset:
                        time_search = re.search('([0-9:]+)', pubdate_td.get_text())
                        pubdate_raw = pubdate_human_offset.get_text() + ' at ' + time_search.group(1)
                    else:
                        pubdate_raw = pubdate_td.get_text()

                    pubdate = self.parse_pubdate(pubdate_raw)

                    item = {
                        'title': title,
                        'link': download_url,
                        'size': size,
                        'seeders': seeders,
                        'leechers': leechers,
                        'pubdate': pubdate,
                    }
                    log.debug('Found result: {0} with {1} seeders and {2} leechers',
                              title, seeders, leechers)

                    items.append(item)
                except (AttributeError, TypeError, KeyError, ValueError, IndexError):
                    log.exception('Failed parsing provider.')

        return items
Example #31
0
    def search(self, search_strings, age=0, ep_obj=None):
        """
        Search indexer using the params in search_strings, either for latest releases, or a string/id search.

        :return: list of results in dict form
        """
        results = []
        if not self._check_auth():
            return results

        # For providers that don't have caps, or for which the t=caps is not working.
        if not self.caps and all(provider not in self.url
                                 for provider in self.providers_without_caps):
            self.get_newznab_categories(just_caps=True)
            if not self.caps:
                return results

        for mode in search_strings:
            self.torznab = False
            search_params = {
                't': 'search',
                'limit': 100,
                'offset': 0,
                'cat': self.cat_ids.strip(', ') or '5030,5040',
                'maxage': app.USENET_RETENTION
            }

            if self.needs_auth and self.key:
                search_params['apikey'] = self.key

            if mode != 'RSS':
                match_indexer = self._match_indexer()
                search_params[
                    't'] = 'tvsearch' if match_indexer and not self.force_query else 'search'

                if search_params['t'] == 'tvsearch':
                    search_params.update(match_indexer)

                    if ep_obj.series.air_by_date or ep_obj.series.sports:
                        date_str = str(ep_obj.airdate)
                        search_params['season'] = date_str.partition('-')[0]
                        search_params['ep'] = date_str.partition(
                            '-')[2].replace('-', '/')
                    else:
                        search_params['season'] = ep_obj.scene_season
                        search_params['ep'] = ep_obj.scene_episode

                if mode == 'Season':
                    search_params.pop('ep', '')

            items = []
            log.debug('Search mode: {0}', mode)

            for search_string in search_strings[mode]:

                if mode != 'RSS':
                    # If its a PROPER search, need to change param to 'search' so it searches using 'q' param
                    if any(proper_string in search_string
                           for proper_string in self.proper_strings):
                        search_params['t'] = 'search'

                    log.debug(
                        'Search show using {search}', {
                            'search':
                            'search string: {search_string}'.format(
                                search_string=search_string
                                if search_params['t'] != 'tvsearch' else
                                'indexer_id: {indexer_id}'.format(
                                    indexer_id=match_indexer))
                        })

                    if search_params['t'] != 'tvsearch':
                        search_params['q'] = search_string

                time.sleep(cpu_presets[app.CPU_PRESET])

                response = self.get_url(urljoin(self.url, 'api'),
                                        params=search_params,
                                        returns='response')
                if not response or not response.text:
                    log.debug('No data returned from provider')
                    continue

                with BS4Parser(response.text, 'html5lib') as html:
                    if not self._check_auth_from_data(html):
                        return items

                    try:
                        self.torznab = 'xmlns:torznab' in html.rss.attrs
                    except AttributeError:
                        self.torznab = False

                    if not html('item'):
                        log.debug(
                            'No results returned from provider. Check chosen Newznab search categories'
                            ' in provider settings and/or usenet retention')
                        continue

                    for item in html('item'):
                        try:
                            title = item.title.get_text(strip=True)
                            download_url = None
                            if item.link:
                                if validators.url(
                                        item.link.get_text(strip=True)):
                                    download_url = item.link.get_text(
                                        strip=True)
                                elif validators.url(item.link.next.strip()):
                                    download_url = item.link.next.strip()

                            if not download_url and item.enclosure:
                                if validators.url(
                                        item.enclosure.get('url', '').strip()):
                                    download_url = item.enclosure.get(
                                        'url', '').strip()

                            if not (title and download_url):
                                continue

                            seeders = leechers = -1
                            if 'gingadaddy' in self.url:
                                size_regex = re.search(r'\d*.?\d* [KMGT]B',
                                                       str(item.description))
                                item_size = size_regex.group(
                                ) if size_regex else -1
                            else:
                                item_size = item.size.get_text(
                                    strip=True) if item.size else -1
                                for attr in item('newznab:attr') + item(
                                        'torznab:attr'):
                                    item_size = attr['value'] if attr[
                                        'name'] == 'size' else item_size
                                    seeders = try_int(
                                        attr['value']
                                    ) if attr['name'] == 'seeders' else seeders
                                    peers = try_int(
                                        attr['value']
                                    ) if attr['name'] == 'peers' else None
                                    leechers = peers - seeders if peers else leechers

                            if not item_size or (self.torznab and
                                                 (seeders is -1
                                                  or leechers is -1)):
                                continue

                            size = convert_size(item_size) or -1

                            pubdate_raw = item.pubdate.get_text(strip=True)
                            pubdate = self.parse_pubdate(pubdate_raw)

                            item = {
                                'title': title,
                                'link': download_url,
                                'size': size,
                                'seeders': seeders,
                                'leechers': leechers,
                                'pubdate': pubdate,
                            }
                            if mode != 'RSS':
                                if seeders == -1:
                                    log.debug('Found result: {0}', title)
                                else:
                                    log.debug(
                                        'Found result: {0} with {1} seeders and {2} leechers',
                                        title, seeders, leechers)

                            items.append(item)
                        except (AttributeError, TypeError, KeyError,
                                ValueError, IndexError):
                            log.error(
                                'Failed parsing provider. Traceback: {0!r}',
                                traceback.format_exc())
                            continue

                # Since we arent using the search string,
                # break out of the search string loop
                if 'tvdbid' in search_params:
                    break

            results += items

        # Reproces but now use force_query = True
        if not results and not self.force_query:
            self.force_query = True
            return self.search(search_strings, ep_obj=ep_obj)

        return results
Example #32
0
    def parse(self, data, mode):
        """
        Parse search results for items.

        :param data: The raw response from a search
        :param mode: The current mode used to search, e.g. RSS

        :return: A list of items found
        """
        items = []

        torrent_rows = data.get('data')
        if not torrent_rows:
            log.debug('Data returned from provider does not contain any torrents')
            return items

        for row in torrent_rows:
            title = row.get('name', '')
            torrent_id = row.get('id', '')
            download_url = self.urls['download'].format(
                urlencode({'id': torrent_id, 'passkey': self.passkey}))

            if not all([title, download_url]):
                continue

            seeders = row.get('seeders', 1)
            leechers = row.get('leechers', 0)

            # Filter unseeded torrent
            if seeders < self.minseed:
                if mode != 'RSS':
                    log.debug("Discarding torrent because it doesn't meet the"
                              ' minimum seeders: {0}. Seeders: {1}',
                              title, seeders)
                continue

            size = convert_size(row.get('size'), default=-1)

            pubdate_raw = row.get('added')
            pubdate = self.parse_pubdate(pubdate_raw)

            item = {
                'title': title,
                'link': download_url,
                'size': size,
                'seeders': seeders,
                'leechers': leechers,
                'pubdate': pubdate,
            }
            if mode != 'RSS':
                log.debug(
                    'Found result: {title} with {x} seeders'
                    ' and {y} leechers', {
                        'title': title,
                        'x': seeders,
                        'y': leechers
                    }
                )

            items.append(item)

        return items
Example #33
0
    def parse(self, data, mode):
        """
        Parse search results for items.

        :param data: The raw response from a search
        :param mode: The current mode used to search, e.g. RSS

        :return: A list of items found
        """
        items = []

        for row in data:

            try:
                # Check if this is a freeleech torrent and if we've configured to only allow freeleech.
                if self.freeleech and row.get('download-multiplier') != 0:
                    continue

                title = re.sub(r'\[.*\=.*\].*\[/.*\]', '',
                               row['name']) if row['name'] else None
                download_url = urljoin(
                    self.urls['download'], '{0}/{1}.torrent'.format(
                        row['t'],
                        row['name'])) if row['t'] and row['name'] else None

                if not all([title, download_url]):
                    continue

                seeders = int(row['seeders'])
                leechers = int(row['leechers'])

                # Filter unseeded torrent
                if seeders < self.minseed:
                    if mode != 'RSS':
                        log.debug(
                            "Discarding torrent because it doesn't meet the"
                            ' minimum seeders: {0}. Seeders: {1}', title,
                            seeders)
                    continue

                torrent_size = row['size']
                size = convert_size(torrent_size) or -1

                if mode != 'RSS':
                    log.debug(
                        'Found result: {0} with {1} seeders and {2} leechers',
                        title, seeders, leechers)

                pubdate_raw = row['ctime']
                pubdate = self.parse_pubdate(pubdate_raw, fromtimestamp=True)

                item = {
                    'title': title,
                    'link': download_url,
                    'size': size,
                    'seeders': seeders,
                    'leechers': leechers,
                    'pubdate': pubdate,
                }
                if mode != 'RSS':
                    log.debug(
                        'Found result: {0} with {1} seeders and {2} leechers',
                        title, seeders, leechers)

                items.append(item)
            except (AttributeError, TypeError, KeyError, ValueError,
                    IndexError):
                log.exception('Failed parsing provider.')

        return items
Example #34
0
    def parse(self, data, mode, **kwargs):
        """
        Parse search results for items.

        :param data: The raw response from a search
        :param mode: The current mode used to search, e.g. RSS.

        :return: A list of items found
        """
        items = []

        keywords = kwargs.pop('keywords', None)

        with BS4Parser(data, 'html5lib') as html:
            torrent_table = html.find(id='sortabletable')
            torrent_rows = torrent_table('tr') if torrent_table else []

            # Continue only if at least one release is found
            if len(torrent_rows) < 2:
                log.debug(
                    'Data returned from provider does not contain any torrents'
                )
                return items

            labels = [
                label.img['title'] if label.img else label.get_text(strip=True)
                for label in torrent_rows[0]('td')
            ]
            for torrent in torrent_rows[1:]:
                try:
                    if self.freeleech and not torrent.find(
                            'img',
                            alt=re.compile('TORRENT GRATUIT : Seulement '
                                           'l\'upload sera compter.')):
                        continue

                    title = torrent.find(
                        class_='tooltip-content').div.get_text(strip=True)
                    download_url = torrent.find(
                        title='Télécharger le torrent!').parent['href']
                    if not all([title, download_url]):
                        continue

                    # Chop off tracker/channel prefix or we cannot parse the result!
                    if mode != 'RSS' and keywords:
                        show_name_first_word = re.search(r'^[^ .]+',
                                                         keywords).group()
                        if not title.startswith(show_name_first_word):
                            title = re.sub(
                                r'.*(' + show_name_first_word + '.*)', r'\1',
                                title)

                    seeders = try_int(
                        torrent.find(title='Seeders').get_text(strip=True))
                    leechers = try_int(
                        torrent.find(title='Leechers').get_text(strip=True))

                    # Filter unseeded torrent
                    if seeders < min(self.minseed, 1):
                        if mode != 'RSS':
                            log.debug(
                                "Discarding torrent because it doesn't meet the"
                                " minimum seeders: {0}. Seeders: {1}", title,
                                seeders)
                        continue

                    torrent_size = torrent('td')[labels.index(
                        'Taille')].get_text(strip=True)
                    size = convert_size(torrent_size) or -1

                    pubdate_raw = torrent('td')[labels.index('Nom')].find_all(
                        'div')[-1].get_text(strip=True)
                    pubdate = self.parse_pubdate(pubdate_raw, dayfirst=True)

                    item = {
                        'title': title,
                        'link': download_url,
                        'size': size,
                        'seeders': seeders,
                        'leechers': leechers,
                        'pubdate': pubdate,
                    }
                    if mode != 'RSS':
                        log.debug(
                            'Found result: {0} with {1} seeders and {2} leechers',
                            title, seeders, leechers)

                    items.append(item)
                except (AttributeError, TypeError, KeyError, ValueError,
                        IndexError):
                    log.error('Failed parsing provider. Traceback: {0!r}',
                              traceback.format_exc())

        return items
Example #35
0
    def parse(self, data, mode):
        """
        Parse search results for items.

        :param data: The raw response from a search
        :param mode: The current mode used to search, e.g. RSS

        :return: A list of items found
        """
        items = []

        with BS4Parser(data, 'html5lib') as html:
            torrent_rows = html('tr')

            if not torrent_rows or not len(torrent_rows) > 1:
                log.debug('Data returned from provider does not contain any torrents')
                return items

            # Cat., Active, Filename, Dl, Wl, Added, Size, Uploader, S, L, C
            labels = [label.a.get_text(strip=True) if label.a else label.get_text(strip=True) for label in
                      torrent_rows[0]('th')]

            # Skip column headers
            for row in torrent_rows[1:]:
                try:
                    cells = row.find_all('td', recursive=False)[:len(labels)]
                    if len(cells) < len(labels):
                        continue

                    torrent_name = cells[labels.index('Torrent name')].a
                    title = torrent_name.get_text(strip=True) if torrent_name else None
                    download_url = torrent_name.get('href') if torrent_name else None
                    if not all([title, download_url]):
                        continue

                    slc = cells[labels.index('S')].get_text()
                    seeders, leechers, _ = [int(value.strip()) for value in slc.split('/')] if slc else (0, 0, 0)

                    # Filter unseeded torrent
                    if seeders < self.minseed:
                        if mode != 'RSS':
                            log.debug("Discarding torrent because it doesn't meet the"
                                      ' minimum seeders: {0}. Seeders: {1}',
                                      title, seeders)
                        continue

                    torrent_size = cells[labels.index('Size')].get_text()
                    size = convert_size(torrent_size) or -1

                    pubdate_raw = cells[labels.index('Added')].get_text()
                    pubdate = self.parse_pubdate(pubdate_raw)

                    item = {
                        'title': title,
                        'link': download_url,
                        'size': size,
                        'seeders': seeders,
                        'leechers': leechers,
                        'pubdate': pubdate,
                    }
                    if mode != 'RSS':
                        log.debug('Found result: {0} with {1} seeders and {2} leechers',
                                  title, seeders, leechers)

                    items.append(item)
                except (AttributeError, TypeError, KeyError, ValueError, IndexError):
                    log.exception('Failed parsing provider.')

        return items
Example #36
0
    def parse(self, data, mode):
        """
        Parse search results for items.

        :param data: The raw response from a search
        :param mode: The current mode used to search, e.g. RSS

        :return: A KV with a list of items found and if there's an next page to search
        """
        def process_column_header(td):
            ret = ''
            if td.a and td.a.img:
                ret = td.a.img.get('title', td.a.get_text(strip=True))
            if not ret:
                ret = td.get_text(strip=True)
            return ret

        items = []
        has_next_page = False
        with BS4Parser(data, 'html5lib') as html:
            torrent_table = html.find('table', id='torrent_table')
            torrent_rows = torrent_table('tr') if torrent_table else []

            # ignore next page in RSS mode
            has_next_page = mode != 'RSS' and html.find('a', class_='pager_next') is not None
            log.debug('Are there more pages? {0}'.format(has_next_page))

            # Continue only if at least one Release is found
            if len(torrent_rows) < 2:
                log.debug('Data returned from provider does not contain any torrents')
                return {'has_next_page': has_next_page, 'items': []}

            # '', '', 'Name /Year', 'Files', 'Time', 'Size', 'Snatches', 'Seeders', 'Leechers'
            labels = [process_column_header(label) for label in torrent_rows[0]('td')]
            group_title = ''

            # Skip column headers
            for result in torrent_rows[1:]:
                cells = result('td')
                result_class = result.get('class')
                # When "Grouping Torrents" is enabled, the structure of table change
                group_index = -2 if 'group_torrent' in result_class else 0
                try:
                    title = result.select('a[href^="torrents.php?id="]')[0].get_text()
                    title = re.sub(r'\s+', ' ', title).strip()  # clean empty lines and multiple spaces

                    if 'group' in result_class or 'torrent' in result_class:
                        # get international title if available
                        title = re.sub(r'.* \[(.*?)\](.*)', r'\1\2', title)

                    if 'group' in result_class:
                        group_title = title
                        continue

                    for serie in self.absolute_numbering:
                        if serie in title:
                            # remove season from title when its in absolute format
                            title = re.sub(r'S\d{2}E(\d{2,4})', r'\1', title)
                            break

                    download_url = urljoin(self.url, result.select('a[href^="torrents.php?action=download"]')[0]['href'])
                    if not all([title, download_url]):
                        continue

                    seeders = try_int(cells[labels.index('Seeders') + group_index].get_text(strip=True))
                    leechers = try_int(cells[labels.index('Leechers') + group_index].get_text(strip=True))

                    # Filter unseeded torrent
                    if seeders < self.minseed:
                        if mode != 'RSS':
                            log.debug("Discarding torrent because it doesn't meet the"
                                      ' minimum seeders: {0}. Seeders: {1}',
                                      title, seeders)
                        continue

                    torrent_details = None
                    if 'group_torrent' in result_class:
                        # torrents belonging to a group
                        torrent_details = title
                        title = group_title
                    elif 'torrent' in result_class:
                        # standalone/un grouped torrents
                        torrent_details = cells[labels.index('Nome/Ano')].find('div', class_='torrent_info').get_text()

                    torrent_details = torrent_details.replace('[', ' ').replace(']', ' ').replace('/', ' ')
                    torrent_details = torrent_details.replace('Full HD ', '1080p').replace('HD ', '720p')

                    torrent_size = cells[labels.index('Tamanho') + group_index].get_text(strip=True)
                    size = convert_size(torrent_size) or -1

                    torrent_name = '{0} {1}'.format(title, torrent_details.strip()).strip()
                    torrent_name = re.sub(r'\s+', ' ', torrent_name)

                    items.append({
                        'title': torrent_name,
                        'link': download_url,
                        'size': size,
                        'seeders': seeders,
                        'leechers': leechers,
                        'pubdate': None
                    })

                    if mode != 'RSS':
                        log.debug('Found result: {0} with {1} seeders and {2} leechers'.format
                                  (torrent_name, seeders, leechers))

                except (AttributeError, TypeError, KeyError, ValueError, IndexError):
                    log.exception('Failed parsing provider.')

        return {'has_next_page': has_next_page, 'items': items}
Example #37
0
    def parse(self, data, mode):
        """
        Parse search results for items.

        :param data: The raw response from a search
        :param mode: The current mode used to search, e.g. RSS

        :return: A list of items found
        """
        items = []

        with BS4Parser(
                data, 'html.parser'
        ) as html:  # Use html.parser, since html5parser has issues with this site.
            tables = html(
                'table',
                width='800')  # Get the last table with a width of 800px.
            torrent_table = tables[-1] if tables else []
            torrent_rows = torrent_table('tr') if torrent_table else []

            # Continue only if at least one release is found
            if len(torrent_rows) < 2:
                log.debug(
                    'Data returned from provider does not contain any torrents'
                )
                return items

            # Skip column headers
            for row in torrent_rows[1:]:
                cells = row('td')
                if len(cells) < 3:
                    # We must have cells[2] because it contains the title
                    continue

                if self.freeleech and not row.get('bgcolor'):
                    continue

                try:
                    title = cells[2].find('a')['title'] if cells[2] else None
                    download_url = urljoin(
                        self.url,
                        cells[0].find('a')['href']) if cells[0] else None
                    if not all([title, download_url]):
                        continue

                    seeders = try_int(cells[8].get_text(
                        strip=True)) if len(cells) > 8 else 1
                    leechers = try_int(cells[9].get_text(
                        strip=True)) if len(cells) > 9 else 0

                    # Filter unseeded torrent
                    if seeders < min(self.minseed, 1):
                        if mode != 'RSS':
                            log.debug(
                                "Discarding torrent because it doesn't meet the"
                                " minimum seeders: {0}. Seeders: {1}", title,
                                seeders)
                        continue

                    torrent_size = cells[6].get_text(
                        ' ') if len(cells) > 6 else None
                    size = convert_size(torrent_size) or -1

                    pubdate_raw = cells[5].get_text(' ')
                    pubdate = self.parse_pubdate(pubdate_raw)

                    item = {
                        'title': title,
                        'link': download_url,
                        'size': size,
                        'seeders': seeders,
                        'leechers': leechers,
                        'pubdate': pubdate,
                    }
                    if mode != 'RSS':
                        log.debug(
                            'Found result: {0} with {1} seeders and {2} leechers',
                            title, seeders, leechers)

                    items.append(item)
                except (AttributeError, TypeError, KeyError, ValueError,
                        IndexError):
                    log.error('Failed parsing provider. Traceback: {0!r}',
                              traceback.format_exc())

        return items
Example #38
0
    def parse(self, data, mode):
        """
        Parse search results for items.

        :param data: The raw response from a search
        :param mode: The current mode used to search, e.g. RSS

        :return: A list of items found
        """
        items = []

        with BS4Parser(data, 'html5lib') as html:
            torrent_rows = html.find_all('tr')

            # Continue only if at least one release is found
            if len(torrent_rows) < 2:
                log.debug('Data returned from provider does not contain any torrents')
                return items

            # Scenetime apparently uses different number of cells in #torrenttable based
            # on who you are. This works around that by extracting labels from the first
            # <tr> and using their index to find the correct download/seeders/leechers td.
            labels = [label.get_text(strip=True) or label.img['title'] for label in torrent_rows[0]('td')]

            # Skip column headers
            for row in torrent_rows[1:]:
                cells = row('td')
                if len(cells) < len(labels):
                    continue

                try:
                    link = cells[labels.index('Name')].find('a')
                    torrent_id = link['href'].replace('details.php?id=', '').split('&')[0]
                    title = link.get_text(strip=True)
                    download_url = self.urls['download'].format(
                        torrent_id,
                        '{0}.torrent'.format(title.replace(' ', '.'))
                    )
                    if not all([title, download_url]):
                        continue

                    seeders = try_int(cells[labels.index('Seeders')].get_text(strip=True))
                    leechers = try_int(cells[labels.index('Leechers')].get_text(strip=True))

                    # Filter unseeded torrent
                    if seeders < min(self.minseed, 1):
                        if mode != 'RSS':
                            log.debug("Discarding torrent because it doesn't meet the"
                                      " minimum seeders: {0}. Seeders: {1}",
                                      title, seeders)
                        continue

                    torrent_size = cells[labels.index('Size')].get_text()
                    torrent_size = re.sub(r'(\d+\.?\d*)', r'\1 ', torrent_size)
                    size = convert_size(torrent_size) or -1

                    item = {
                        'title': title,
                        'link': download_url,
                        'size': size,
                        'seeders': seeders,
                        'leechers': leechers,
                        'pubdate': None,
                    }
                    if mode != 'RSS':
                        log.debug('Found result: {0} with {1} seeders and {2} leechers',
                                  title, seeders, leechers)

                    items.append(item)
                except (AttributeError, TypeError, KeyError, ValueError, IndexError):
                    log.error('Failed parsing provider. Traceback: {0!r}',
                              traceback.format_exc())

        return items
Example #39
0
    def parse(self, data, mode):
        """
        Parse search results for items.

        :param data: The raw response from a search
        :param mode: The current mode used to search, e.g. RSS

        :return: A list of items found
        """
        items = []

        with BS4Parser(data, 'html5lib') as html:

            rows = html('item')
            if not rows:
                log.debug(
                    'No results returned from provider. Check chosen Newznab search categories'
                    ' in provider settings and/or usenet retention')
                return items

            try:
                self.torznab = 'xmlns:torznab' in html.rss.attrs
            except AttributeError:
                self.torznab = False

            for item in rows:
                try:
                    title = item.title.get_text(strip=True)
                    download_url = None

                    if item.enclosure:
                        url = item.enclosure.get('url', '').strip()
                        if url.startswith('magnet:'):
                            download_url = url
                        elif validators.url(url):
                            download_url = url
                            # Jackett needs extension added (since v0.8.396)
                            if not url.endswith('.torrent'):
                                content_type = item.enclosure.get('type', '')
                                if content_type == 'application/x-bittorrent':
                                    download_url = '{0}{1}'.format(url, '.torrent')

                    if not download_url and item.link:
                        url = item.link.get_text(strip=True)
                        if validators.url(url) or url.startswith('magnet:'):
                            download_url = url

                        if not download_url:
                            url = item.link.next.strip()
                            if validators.url(url) or url.startswith('magnet:'):
                                download_url = url

                    if not (title and download_url):
                        continue

                    seeders = leechers = -1
                    if 'gingadaddy' in self.url:
                        size_regex = re.search(r'\d*.?\d* [KMGT]B', str(item.description))
                        item_size = size_regex.group() if size_regex else -1
                    else:
                        item_size = item.size.get_text(strip=True) if item.size else -1
                        # Use regex to find name-spaced tags
                        # see BeautifulSoup4 bug 1720605
                        # https://bugs.launchpad.net/beautifulsoup/+bug/1720605
                        newznab_attrs = item(re.compile('newznab:attr'))
                        torznab_attrs = item(re.compile('torznab:attr'))
                        for attr in newznab_attrs + torznab_attrs:
                            item_size = attr['value'] if attr['name'] == 'size' else item_size
                            seeders = try_int(attr['value']) if attr['name'] == 'seeders' else seeders
                            peers = try_int(attr['value']) if attr['name'] == 'peers' else None
                            leechers = peers - seeders if peers else leechers

                    if not item_size or (self.torznab and (seeders == -1 or leechers == -1)):
                        continue

                    size = convert_size(item_size) or -1

                    pubdate_raw = item.pubdate.get_text(strip=True)
                    pubdate = self.parse_pubdate(pubdate_raw)

                    item = {
                        'title': title,
                        'link': download_url,
                        'size': size,
                        'seeders': seeders,
                        'leechers': leechers,
                        'pubdate': pubdate,
                    }
                    if mode != 'RSS':
                        if seeders == -1:
                            log.debug('Found result: {0}', title)
                        else:
                            log.debug('Found result: {0} with {1} seeders and {2} leechers',
                                      title, seeders, leechers)

                    items.append(item)
                except (AttributeError, TypeError, KeyError, ValueError, IndexError):
                    log.exception('Failed parsing provider.')

        return items
Example #40
0
    def parse(self, data, mode):
        """
        Parse search results for items.

        :param data: The raw response from a search
        :param mode: The current mode used to search, e.g. RSS

        :return: A list of items found
        """
        items = []

        with BS4Parser(data, 'html5lib') as html:
            torrent_table = html.find('table', border='1')
            torrent_rows = torrent_table('tr') if torrent_table else []

            # Continue only if at least one release is found
            if len(torrent_rows) < 2:
                log.debug('Data returned from provider does not contain any torrents')
                return items

            # "Type", "Name", Files", "Comm.", "Added", "TTL", "Size", "Snatched", "Seeders", "Leechers"
            labels = [label.get_text(strip=True) for label in torrent_rows[0]('td')]

            # Skip column headers
            for row in torrent_rows[1:]:
                cells = row('td')

                if len(cells) < len(labels):
                    continue

                try:
                    download_url = urljoin(self.url, cells[labels.index('Name')].find('a',
                                           href=re.compile(r'download.php\?id='))['href'])
                    title_element = cells[labels.index('Name')].find('a', href=re.compile(r'details.php\?id='))
                    title = title_element.get('title', '') or title_element.get_text(strip=True)
                    if not all([title, download_url]):
                        continue

                    # Free leech torrents are marked with green [F L] in the title
                    # (i.e. <font color=green>[F&nbsp;L]</font>)
                    freeleech = cells[labels.index('Name')].find('font', color='green')
                    if freeleech:
                        # \xa0 is a non-breaking space in Latin1 (ISO 8859-1)
                        freeleech_tag = '[F\xa0L]'
                        title = title.replace(freeleech_tag, '')
                        if self.freeleech and freeleech.get_text(strip=True) != freeleech_tag:
                            continue

                    seeders = try_int(cells[labels.index('Seeders')].get_text(strip=True), 1)
                    leechers = try_int(cells[labels.index('Leechers')].get_text(strip=True))

                    # Filter unseeded torrent
                    if seeders < self.minseed:
                        if mode != 'RSS':
                            log.debug("Discarding torrent because it doesn't meet the"
                                      ' minimum seeders: {0}. Seeders: {1}',
                                      title, seeders)
                        continue

                    torrent_size = cells[labels.index('Size')].get_text(' ', strip=True)
                    size = convert_size(torrent_size) or -1

                    pubdate_raw = cells[labels.index('Added')].get_text(' ', strip=True)
                    pubdate = self.parse_pubdate(pubdate_raw)

                    item = {
                        'title': title,
                        'link': download_url,
                        'size': size,
                        'seeders': seeders,
                        'leechers': leechers,
                        'pubdate': pubdate,
                    }
                    if mode != 'RSS':
                        log.debug('Found result: {0} with {1} seeders and {2} leechers',
                                  title, seeders, leechers)

                    items.append(item)
                except (AttributeError, TypeError, KeyError, ValueError, IndexError):
                    log.exception('Failed parsing provider.')

        return items
Example #41
0
def test_convert_size():
    # converts pretty file sizes to integers
    assert sut.convert_size('1 B') == 1
    assert sut.convert_size('1 KB') == 1024
    # can use decimal units (e.g. KB = 1000 bytes instead of 1024)
    assert sut.convert_size('1 kb', use_decimal=True) == 1000

    # returns integer sizes for integers
    assert sut.convert_size(0, -1) == 0
    assert sut.convert_size(100, -1) == 100
    # returns integer sizes for floats too
    assert sut.convert_size(1.312, -1) == 1
    # return integer variant when passed as str
    assert sut.convert_size('1024', -1) == 1024

    # without a default value, failures return None
    assert sut.convert_size('pancakes') is None

    # default value can be anything
    assert sut.convert_size(None, -1) == -1
    assert sut.convert_size('', 3.14) == 3.14
    assert sut.convert_size('elephant', 'frog') == 'frog'

    # negative sizes return 0
    assert sut.convert_size(-1024, -1) == 0
    assert sut.convert_size('-1 GB', -1) == 0

    # can also use `or` for a default value
    assert sut.convert_size(None) or 100 == 100
    # default doesn't have to be integer
    assert sut.convert_size(None) or 1.61803 == 1.61803
    # default doesn't have to be numeric either
    assert sut.convert_size(None) or '100' == '100'
    # can use `or` to provide a default when size evaluates to 0
    assert sut.convert_size('-1 GB') or -1 == -1

    # default units can be kwarg'd
    assert sut.convert_size('1', default_units='GB') == sut.convert_size('1 GB')

    # separator can be kwarg'd
    assert sut.convert_size('1?GB', sep='?') == sut.convert_size('1 GB')

    # can use custom dictionary to support internationalization
    french = ['O', 'KO', 'MO', 'GO', 'TO', 'PO']
    assert sut.convert_size('1 o', units=french) == 1
    assert sut.convert_size('1 go', use_decimal=True, units=french) == 1000000000
    assert sut.convert_size('1 o') is None  # Wrong units so result is None

    # custom units need to be uppercase or they won't match
    oops = ['b', 'kb', 'Mb', 'Gb', 'tB', 'Pb']
    assert sut.convert_size('1 b', units=oops) is None
    assert sut.convert_size('1 B', units=oops) is None
    assert sut.convert_size('1 Mb', units=oops) is None
    assert sut.convert_size('1 MB', units=oops) is None

    # utilize the regex to parse sizes without separator
    assert sut.convert_size('1GB', sep='') == 1073741824
    assert sut.convert_size('1.00GB', sep='') == 1073741824
    assert sut.convert_size('1.01GB', sep='') == 1084479242
    assert sut.convert_size('1B', sep='') == 1

    # no separator and custom units
    french = ['O', 'KO', 'MO', 'GO', 'TO', 'PO']
    assert sut.convert_size('1Go', sep='', units=french) == 1073741824
    assert sut.convert_size('1.00Go', sep='', units=french) == 1073741824
    assert sut.convert_size('1.01Go', sep='', units=french) == 1084479242
    assert sut.convert_size('1o', sep='', units=french) == 1

    # no separator, custom units need to be uppercase or they won't match
    oops = ['b', 'kb', 'Mb', 'Gb', 'tB', 'Pb']
    assert sut.convert_size('1b', sep='', units=oops) is None
    assert sut.convert_size('1B', sep='', units=oops) is None
    assert sut.convert_size('1Mb', sep='', units=oops) is None
    assert sut.convert_size('1MB', sep='', units=oops) is None
Example #42
0
    def parse(self, data, mode):
        """
        Parse search results for items.

        :param data: The raw response from a search
        :param mode: The current mode used to search, e.g. RSS

        :return: A list of items found
        """
        items = []

        with BS4Parser(data, 'html5lib') as html:

            rows = html('item')
            if not rows:
                log.debug(
                    'No results returned from provider. Check chosen Torznab search categories '
                    'in provider settings.')
                return items

            for item in rows:
                try:
                    title = item.title.get_text(strip=True)
                    download_url = item.enclosure.get('url')
                    if not all([title, download_url]):
                        continue

                    seeders_attr = item.find('torznab:attr',
                                             attrs={'name': 'seeders'})
                    peers_attr = item.find('torznab:attr',
                                           attrs={'name': 'peers'})
                    seeders = int(seeders_attr.get('value',
                                                   0)) if seeders_attr else 1
                    peers = int(peers_attr.get('value',
                                               0)) if peers_attr else 0
                    leechers = peers - seeders if peers - seeders > 0 else 0

                    # Filter unseeded torrent
                    if seeders < self.minseed:
                        if mode != 'RSS':
                            log.debug(
                                "Discarding torrent because it doesn't meet the"
                                ' minimum seeders: {0}. Seeders: {1}', title,
                                seeders)
                        continue

                    torrent_size = item.size.get_text(strip=True)
                    size = convert_size(torrent_size, default=-1)

                    pubdate_raw = item.pubdate.get_text(strip=True)
                    pubdate = self.parse_pubdate(pubdate_raw)

                    item = {
                        'title': title,
                        'link': download_url,
                        'size': size,
                        'seeders': seeders,
                        'leechers': leechers,
                        'pubdate': pubdate,
                    }
                    if mode != 'RSS':
                        log.debug(
                            'Found result: {0} with {1} seeders and {2} leechers',
                            title, seeders, leechers)

                    items.append(item)
                except (AttributeError, TypeError, KeyError, ValueError,
                        IndexError):
                    log.exception('Failed parsing provider.')

        return items
Example #43
0
    def parse(self, data, mode):
        """
        Parse search results for items.

        :param data: The raw response from a search
        :param mode: The current mode used to search, e.g. RSS

        :return: A list of items found
        """
        items = []

        with BS4Parser(data, 'html5lib') as soup:
            torrent_table = soup.find('table', class_='listing')
            torrent_rows = torrent_table('tr') if torrent_table else []

            # Continue only if at least one release is found
            if len(torrent_rows) < 2:
                log.debug('Data returned from provider does not contain any torrents')
                return items

            a = 1 if len(torrent_rows[0]('td')) < 2 else 0

            # Skip column headers
            for top, bot in zip(torrent_rows[a::2], torrent_rows[a + 1::2]):
                try:
                    desc_top = top.find('td', class_='desc-top')
                    title = desc_top.get_text(strip=True) if desc_top else None
                    download_url = desc_top.find('a')['href'] if desc_top else None
                    if not all([title, download_url]):
                        continue

                    stats = bot.find('td', class_='stats').get_text(strip=True)
                    sl = re.match(r'S:(?P<seeders>\d+)L:(?P<leechers>\d+)C:(?:\d+)ID:(?:\d+)', stats.replace(' ', ''))
                    seeders = try_int(sl.group('seeders')) if sl else 0
                    leechers = try_int(sl.group('leechers')) if sl else 0

                    # Filter unseeded torrent
                    if seeders < min(self.minseed, 1):
                        if mode != 'RSS':
                            log.debug("Discarding torrent because it doesn't meet the"
                                      " minimum seeders: {0}. Seeders: {1}",
                                      title, seeders)
                        continue

                    desc_bottom = bot.find('td', class_='desc-bot').get_text(strip=True)
                    size = convert_size(desc_bottom.split('|')[1].strip('Size: ')) or -1

                    item = {
                        'title': title,
                        'link': download_url,
                        'size': size,
                        'seeders': seeders,
                        'leechers': leechers,
                        'pubdate': None,
                    }
                    if mode != 'RSS':
                        log.debug('Found result: {0} with {1} seeders and {2} leechers',
                                  title, seeders, leechers)

                    items.append(item)
                except (AttributeError, TypeError, KeyError, ValueError, IndexError):
                    log.exception('Failed parsing provider.')

        return items
Example #44
0
    def parse(self, data, mode):
        """
        Parse search results for items.

        :param data: The raw response from a search
        :param mode: The current mode used to search, e.g. RSS

        :return: A list of items found
        """
        # Units
        units = ['B', 'KIB', 'MIB', 'GIB', 'TIB', 'PIB']

        def process_column_header(th):
            result = ''
            if th.a:
                result = th.a.get_text(strip=True)
            if not result:
                result = th.get_text(strip=True)
            return result

        items = []

        with BS4Parser(data, 'html5lib') as html:
            torrent_table = html.find('table', id='searchResult')
            torrent_rows = torrent_table('tr') if torrent_table else []

            # Continue only if at least one release is found
            if len(torrent_rows) < 2:
                log.debug('Data returned from provider does not contain any {0}torrents',
                          'confirmed ' if self.confirmed else '')
                return items

            labels = [process_column_header(label) for label in torrent_rows[0]('th')]

            # Skip column headers
            for row in torrent_rows[1:]:
                cells = row('td')
                if len(cells) < len(labels):
                    continue

                try:
                    title = row.find(class_='detName')
                    title = title.get_text(strip=True) if title else None
                    download_url = row.find(title='Download this torrent using magnet')
                    download_url = download_url['href'] + self._custom_trackers if download_url else None
                    if download_url and 'magnet:?' not in download_url:
                        log.debug('Invalid ThePirateBay proxy please try another one')
                        continue
                    if not all([title, download_url]):
                        continue

                    seeders = try_int(cells[labels.index('SE')].get_text(strip=True), 1)
                    leechers = try_int(cells[labels.index('LE')].get_text(strip=True))

                    # Filter unseeded torrent
                    if seeders < self.minseed:
                        if mode != 'RSS':
                            log.debug("Discarding torrent because it doesn't meet the"
                                      ' minimum seeders: {0}. Seeders: {1}',
                                      title, seeders)
                        continue

                    # Accept Torrent only from Good People for every Episode Search
                    if self.confirmed and not row.find(alt=re.compile(r'VIP|Trusted')):
                        if mode != 'RSS':
                            log.debug("Found result {0} but that doesn't seem like a trusted"
                                      " result so I'm ignoring it", title)
                        continue

                    # Convert size after all possible skip scenarios
                    torrent_size = cells[labels.index('Name')].find(class_='detDesc')
                    torrent_size = torrent_size.get_text(strip=True).split(', ')[1]
                    torrent_size = re.sub(r'Size ([\d.]+).+([KMGT]iB)', r'\1 \2', torrent_size)
                    size = convert_size(torrent_size, units=units) or -1

                    item = {
                        'title': title,
                        'link': download_url,
                        'size': size,
                        'seeders': seeders,
                        'leechers': leechers,
                        'pubdate': None,
                    }
                    if mode != 'RSS':
                        log.debug('Found result: {0} with {1} seeders and {2} leechers',
                                  title, seeders, leechers)

                    items.append(item)
                except (AttributeError, TypeError, KeyError, ValueError, IndexError):
                    log.exception('Failed parsing provider.')

        return items
Example #45
0
    def parse(self, data, mode):
        """
        Parse search results for items.

        :param data: The raw response from a search
        :param mode: The current mode used to search, e.g. RSS

        :return: A list of items found
        """
        items = []

        with BS4Parser(data, 'html5lib') as html:
            table_header = html.find('thead')

            # Continue only if at least one release is found
            if not table_header:
                log.debug('Data returned from provider does not contain any torrents')
                return items

            table_ths = table_header.find_all('th')
            # [u'Category', u'', u'Filename', u'Comments', u'Torrent', u'Magnet',
            #  u'File size', u'Age', u'Seeders', u'Leechers', u'Completed']
            labels = [label.span.get('title') if label.span else '' for label in table_ths]

            torrent_rows = html.find('tbody').find_all('tr')
            for row in torrent_rows:
                cells = row.find_all('td')

                try:
                    title = cells[labels.index('Filename')].span.get('title')
                    download_url = cells[labels.index('Torrent')].a.get('href')
                    if not all([title, download_url]):
                        continue

                    download_url = urljoin(self.url, download_url)

                    seeders = try_int(cells[labels.index('Seeders')].get_text(strip=True))
                    leechers = try_int(cells[labels.index('Leechers')].get_text(strip=True))

                    # Filter unseeded torrent
                    if seeders < self.minseed:
                        if mode != 'RSS':
                            log.debug("Discarding torrent because it doesn't meet the"
                                      ' minimum seeders: {0}. Seeders: {1}',
                                      title, seeders)
                        continue

                    torrent_size = cells[labels.index('File size')].get_text()
                    size = convert_size(torrent_size) or -1

                    pubdate_raw = cells[labels.index('Age')].get('title')
                    pubdate = self.parse_pubdate(pubdate_raw)

                    item = {
                        'title': title,
                        'link': download_url,
                        'size': size,
                        'seeders': seeders,
                        'leechers': leechers,
                        'pubdate': pubdate,
                    }
                    if mode != 'RSS':
                        log.debug('Found result: {0} with {1} seeders and {2} leechers',
                                  title, seeders, leechers)

                    items.append(item)
                except (AttributeError, TypeError, KeyError, ValueError, IndexError):
                    log.exception('Failed parsing provider.')

        return items
Example #46
0
    def parse(self, data, mode):
        """
        Parse search results for items.

        :param data: The raw response from a search
        :param mode: The current mode used to search, e.g. RSS

        :return: A list of items found
        """
        items = []

        with BS4Parser(data, 'html5lib') as html:

            rows = html('item')
            if not rows:
                log.debug(
                    'No results returned from provider. Check chosen Torznab search categories '
                    'in provider settings.')
                return items

            for item in rows:
                try:
                    title = item.title.get_text(strip=True)
                    download_url = item.enclosure.get('url')
                    if not all([title, download_url]):
                        continue

                    seeders_attr = item.find('torznab:attr', attrs={'name': 'seeders'})
                    peers_attr = item.find('torznab:attr', attrs={'name': 'peers'})
                    seeders = int(seeders_attr.get('value', 0)) if seeders_attr else 1
                    leechers = int(peers_attr.get('value', 0)) if peers_attr else 0

                    # Filter unseeded torrent
                    if seeders < self.minseed:
                        if mode != 'RSS':
                            log.debug("Discarding torrent because it doesn't meet the"
                                      ' minimum seeders: {0}. Seeders: {1}',
                                      title, seeders)
                        continue

                    torrent_size = item.size.get_text(strip=True)
                    size = convert_size(torrent_size, default=-1)

                    pubdate_raw = item.pubdate.get_text(strip=True)
                    pubdate = self.parse_pubdate(pubdate_raw)

                    item = {
                        'title': title,
                        'link': download_url,
                        'size': size,
                        'seeders': seeders,
                        'leechers': leechers,
                        'pubdate': pubdate,
                    }
                    if mode != 'RSS':
                        log.debug('Found result: {0} with {1} seeders and {2} leechers',
                                  title, seeders, leechers)

                    items.append(item)
                except (AttributeError, TypeError, KeyError, ValueError, IndexError):
                    log.exception('Failed parsing provider.')

        return items
Example #47
0
    def parse(self, data, mode):
        """
        Parse search results for items.

        :param data: The raw response from a search
        :param mode: The current mode used to search, e.g. RSS

        :return: A list of items found
        """
        # Units
        units = ['B', 'KB', 'MB', 'GB', 'TB', 'PB']

        def process_column_header(td):
            result = ''
            if td.a and td.a.img:
                result = td.a.img.get('title', td.a.get_text(strip=True))
            if not result:
                result = td.get_text(strip=True)
            return result

        items = []

        with BS4Parser(data, 'html5lib') as html:
            torrent_table = html.find('table', id='torrent_table')
            torrent_rows = torrent_table('tr') if torrent_table else []

            # Continue only if at least one release is found
            if len(torrent_rows) < 2:
                log.debug('Data returned from provider does not contain any torrents')
                return items

            # '', '', 'Name /Year', 'Files', 'Time', 'Size', 'Snatches', 'Seeders', 'Leechers'
            labels = [process_column_header(label) for label in torrent_rows[0]('td')]

            # Skip column headers
            for row in torrent_rows[1:]:
                cells = row('td')
                if len(cells) < len(labels):
                    continue

                try:
                    title = cells[labels.index('Name /Year')].find('a', dir='ltr').get_text(strip=True)
                    download = cells[labels.index('Name /Year')].find('a', title='Download')['href']
                    download_url = urljoin(self.url, download)
                    if not all([title, download_url]):
                        continue

                    seeders = try_int(cells[labels.index('Seeders')].get_text(strip=True))
                    leechers = try_int(cells[labels.index('Leechers')].get_text(strip=True))

                    # Filter unseeded torrent
                    if seeders < self.minseed:
                        if mode != 'RSS':
                            log.debug("Discarding torrent because it doesn't meet the"
                                      ' minimum seeders: {0}. Seeders: {1}',
                                      title, seeders)
                        continue

                    torrent_size = cells[labels.index('Size')].get_text(strip=True)
                    size = convert_size(torrent_size, units=units) or -1

                    pubdate_raw = cells[labels.index('Time')].find('span')['title']
                    pubdate = self.parse_pubdate(pubdate_raw)

                    item = {
                        'title': title,
                        'link': download_url,
                        'size': size,
                        'seeders': seeders,
                        'leechers': leechers,
                        'pubdate': pubdate,
                    }
                    if mode != 'RSS':
                        log.debug('Found result: {0} with {1} seeders and {2} leechers',
                                  title, seeders, leechers)

                    items.append(item)
                except (AttributeError, TypeError, KeyError, ValueError, IndexError):
                    log.exception('Failed parsing provider.')

        return items