def parse(self, data, mode): """ Parse search results for items. :param data: The raw response from a search :param mode: The current mode used to search, e.g. RSS :return: A list of items found """ items = [] with BS4Parser(data, 'html5lib') as soup: torrent_table = soup.find('table', class_='listing') torrent_rows = torrent_table('tr') if torrent_table else [] # Continue only if at least one release is found if len(torrent_rows) < 2: log.debug('Data returned from provider does not contain any torrents') return items a = 1 if len(torrent_rows[0]('td')) < 2 else 0 # Skip column headers for top, bot in zip(torrent_rows[a::2], torrent_rows[a + 1::2]): try: desc_top = top.find('td', class_='desc-top') title = desc_top.get_text(strip=True) if desc_top else None download_url = desc_top.find('a')['href'] if desc_top else None if not all([title, download_url]): continue stats = bot.find('td', class_='stats').get_text(strip=True) sl = re.match(r'S:(?P<seeders>\d+)L:(?P<leechers>\d+)C:(?:\d+)ID:(?:\d+)', stats.replace(' ', '')) seeders = try_int(sl.group('seeders')) if sl else 0 leechers = try_int(sl.group('leechers')) if sl else 0 # Filter unseeded torrent if seeders < min(self.minseed, 1): if mode != 'RSS': log.debug("Discarding torrent because it doesn't meet the" " minimum seeders: {0}. Seeders: {1}", title, seeders) continue desc_bottom = bot.find('td', class_='desc-bot').get_text(strip=True) size = convert_size(desc_bottom.split('|')[1].strip('Size: ')) or -1 item = { 'title': title, 'link': download_url, 'size': size, 'seeders': seeders, 'leechers': leechers, 'pubdate': None, } if mode != 'RSS': log.debug('Found result: {0} with {1} seeders and {2} leechers', title, seeders, leechers) items.append(item) except (AttributeError, TypeError, KeyError, ValueError, IndexError): log.exception('Failed parsing provider.') return items
def parse(self, data, mode): """ Parse search results for items. :param data: The raw response from a search :param mode: The current mode used to search, e.g. RSS :return: A list of items found """ def process_column_header(td): result = '' if td.a and td.a.img: result = td.a.img.get('title', td.a.get_text(strip=True)) if not result: result = td.get_text(strip=True) if not result and td.a and td.a.get('title'): result = td.a['title'] return result items = [] with BS4Parser(data, 'html5lib') as html: torrent_table = html.find('table', class_='torrent_table') torrent_rows = torrent_table('tr') if torrent_table else [] # Continue only if at least one release is found if len(torrent_rows) < 2: log.debug( 'Data returned from provider does not contain any torrents' ) return items # Need to only search one level deep for 'td' tags, as one of the td's also has a td. labels = [ process_column_header(label) for label in torrent_rows[0].find_all('td', recursive=False) ] # Skip column headers for row in torrent_rows[1:]: cells = row.find_all('td', recursive=False) if len(cells) < len(labels): continue try: # Skip if torrent has been nuked due to poor quality if row.find('img', alt='Nuked'): continue title = cells[labels.index('Name')].find( 'a', class_='overlay_torrent').get_text(strip=True) download_url = urljoin( self.url, cells[labels.index('Name')].find('a')['href']) if not all([title, download_url]): continue seeders = int(cells[labels.index('Seeders')].get_text( strip=True).replace(',', '')) leechers = int(cells[labels.index('Leechers')].get_text( strip=True).replace(',', '')) # Filter unseeded torrent if seeders < self.minseed: if mode != 'RSS': log.debug( "Discarding torrent because it doesn't meet the" ' minimum seeders: {0}. Seeders: {1}', title, seeders) continue units = ['B', 'KIB', 'MIB', 'GIB', 'TB', 'PB'] torrent_size = cells[labels.index('Size')].get_text( strip=True) size = convert_size(torrent_size, units=units) or -1 pubdate_raw = cells[3].find('span')['title'] pubdate = self.parse_pubdate(pubdate_raw) item = { 'title': title, 'link': download_url, 'size': size, 'seeders': seeders, 'leechers': leechers, 'pubdate': pubdate, } if mode != 'RSS': log.debug( 'Found result: {0} with {1} seeders and {2} leechers', title, seeders, leechers) items.append(item) except (AttributeError, TypeError, KeyError, ValueError, IndexError): log.exception('Failed parsing provider.') return items
def parse(self, data, mode): """ Parse search results for items. :param data: The raw response from a search :param mode: The current mode used to search, e.g. RSS :return: A list of items found """ items = [] with BS4Parser(data, 'html5lib') as html: # Continue only if at least one release is found empty = html.find('h2', text='No .torrents fit this filter criteria') if empty: log.debug( 'Data returned from provider does not contain any torrents' ) return items torrent_table = html.find( 'table', attrs={'style': 'border: none; width: 100%;'}) torrent_rows = torrent_table( 'tr', class_='browse') if torrent_table else [] for row in torrent_rows: cells = row('td') try: title = cells[1].find('a').get('title') torrent_url = cells[2].find('a').get('href') download_url = urljoin(self.url, torrent_url) if not all([title, torrent_url]): continue seeders = try_int(cells[9].get_text(), 1) leechers = try_int(cells[10].get_text()) # Filter unseeded torrent if seeders < self.minseed: if mode != 'RSS': log.debug( "Discarding torrent because it doesn't meet the" ' minimum seeders: {0}. Seeders: {1}', title, seeders) continue torrent_size = self._norm_size( cells[7].get_text(strip=True)) size = convert_size(torrent_size) or -1 pubdate_raw = cells[5].get_text() pubdate = self.parse_pubdate(pubdate_raw, human_time=True) item = { 'title': title, 'link': download_url, 'size': size, 'seeders': seeders, 'leechers': leechers, 'pubdate': pubdate, } if mode != 'RSS': log.debug( 'Found result: {0} with {1} seeders and {2} leechers', title, seeders, leechers) items.append(item) except (AttributeError, TypeError, KeyError, ValueError, IndexError): log.exception('Failed parsing provider.') return items
def parse(self, data, mode): """ Parse search results for items. :param data: The raw response from a search :param mode: The current mode used to search, e.g. RSS :return: A list of items found """ items = [] with BS4Parser(data, 'html5lib') as html: torrent_table = html.find('table', {'id': 'torrent_table'}) # Continue only if at least one release is found if not torrent_table: log.debug( 'Data returned from provider does not contain any {0}torrents', 'ranked ' if self.ranked else '') return items torrent_body = torrent_table.find('tbody') torrent_rows = torrent_body.contents del torrent_rows[1::2] for row in torrent_rows[1:]: try: torrent = row('td') if len(torrent) <= 1: break all_as = (torrent[1])('a') notinternal = row.find( 'img', src='/static//common/user_upload.png') if self.ranked and notinternal: log.debug( 'Found a user uploaded release, Ignoring it..') continue freeleech = row.find( 'img', src='/static//common/browse/freeleech.png') if self.freeleech and not freeleech: continue title = all_as[2].string download_url = urljoin(self.url, all_as[0].attrs['href']) if not all([title, download_url]): continue seeders = try_int((row('td')[6]).text.replace(',', '')) leechers = try_int((row('td')[7]).text.replace(',', '')) # Filter unseeded torrent if seeders < min(self.minseed, 1): if mode != 'RSS': log.debug( "Discarding torrent because it doesn't meet the" " minimum seeders: {0}. Seeders: {1}", title, seeders) continue torrent_size = row.find( 'td', class_='nobr').find_next_sibling('td').string if torrent_size: size = convert_size(torrent_size) or -1 pubdate_raw = row.find('td', class_='nobr').find('span')['title'] pubdate = self.parse_pubdate(pubdate_raw) item = { 'title': title, 'link': download_url, 'size': size, 'seeders': seeders, 'leechers': leechers, 'pubdate': pubdate, } if mode != 'RSS': log.debug( 'Found result: {0} with {1} seeders and {2} leechers', title, seeders, leechers) items.append(item) except (AttributeError, TypeError, KeyError, ValueError, IndexError): log.error('Failed parsing provider. Traceback: {0!r}', traceback.format_exc()) return items
def parse(self, data, mode): """ Parse search results for items. :param data: The raw response from a search :param mode: The current mode used to search, e.g. RSS :return: A list of items found """ items = [] with BS4Parser(data, 'html.parser') as html: torrent_table = html.find('div', class_='browse') torrent_rows = torrent_table( 'div', class_=re.compile('^line')) if torrent_table else [] # Continue only if at least one release is found if len(torrent_rows) < 1: log.debug( 'Data returned from provider does not contain any torrents' ) return items for row in torrent_rows: try: heb_eng_title = row.find('div', class_='bTitle').find( href=re.compile(r'details\.php')).find('b').get_text() if '/' in heb_eng_title: title = heb_eng_title.split('/')[1].strip() elif '\\' in heb_eng_title: title = heb_eng_title.split('\\')[1].strip() else: continue download_id = row.find('div', class_='bTitle').find( href=re.compile(r'download\.php'))['href'] if not all([title, download_id]): continue download_url = urljoin(self.url, download_id) seeders = try_int( row.find('div', class_='bUping').get_text(strip=True)) leechers = try_int( row.find('div', class_='bDowning').get_text(strip=True)) # Filter unseeded torrent if seeders < min(self.minseed, 1): if mode != 'RSS': log.debug( "Discarding torrent because it doesn't meet the" ' minimum seeders: {0}. Seeders: {1}', title, seeders) continue torrent_size = row.find( 'div', class_='bSize').get_text(strip=True) size = convert_size(torrent_size[5:], sep='') or -1 pubdate_raw = row.find('div', class_=re.compile( 'bHow')).find_all('span')[1].next_sibling.strip() pubdate = self.parse_pubdate(pubdate_raw) item = { 'title': title, 'link': download_url, 'size': size, 'seeders': seeders, 'leechers': leechers, 'pubdate': pubdate, } if mode != 'RSS': log.debug( 'Found result: {0} with {1} seeders and {2} leechers', title, seeders, leechers) items.append(item) except (AttributeError, TypeError, KeyError, ValueError, IndexError): log.exception('Failed parsing provider.') return items
def parse(self, data, mode): """ Parse search results for items. :param data: The raw response from a search :param mode: The current mode used to search, e.g. RSS :return: A list of items found """ items = [] with BS4Parser(data, 'html5lib') as html: torrent_rows = html.find_all( class_=['release-info', 'release-links']) # Continue only if at least one release is found if not torrent_rows: log.debug( 'Data returned from provider does not contain any torrents' ) return items for row in torrent_rows: try: if row['class'] == ['release-info']: pubdate = None # pubdate is only supported for non-daily searches if mode != 'RSS': # keep the date and strip the rest pubdate_raw = row.find( 'td', class_='rls-label').get_text()[1:9] pubdate = self.parse_pubdate( pubdate_raw, timezone='America/Los_Angeles') continue title = row.find('td', class_='dl-label').get_text() magnet = row.find('td', class_='dl-type hs-magnet-link') download_url = magnet or row.find( 'td', class_='dl-type hs-torrent-link') if not all([title, download_url]): continue download_url = download_url.span.a.get('href') # Add HorribleSubs group to the title title = '{group} {title}'.format(group='[HorribleSubs]', title=title) # HorribleSubs doesn't provide this information seeders = 1 leechers = 0 size = -1 item = { 'title': title, 'link': download_url, 'size': size, 'seeders': seeders, 'leechers': leechers, 'pubdate': pubdate, } if mode != 'RSS': log.debug( 'Found result: {0} with {1} seeders and {2} leechers', title, seeders, leechers) items.append(item) except (AttributeError, TypeError, KeyError, ValueError, IndexError): log.exception('Failed parsing provider.') return items
def parse(self, data, mode): """ Parse search results for items. :param data: The raw response from a search :param mode: The current mode used to search, e.g. RSS :return: A list of items found """ items = [] with BS4Parser(data, 'html5lib') as html: torrent_rows = html.find_all('tr') # Continue only if at least one release is found if len(torrent_rows) < 2: log.debug('Data returned from provider does not contain any torrents') return items # Scenetime apparently uses different number of cells in #torrenttable based # on who you are. This works around that by extracting labels from the first # <tr> and using their index to find the correct download/seeders/leechers td. labels = [label.get_text(strip=True) or label.img['title'] for label in torrent_rows[0]('td')] # Skip column headers for row in torrent_rows[1:]: cells = row('td') if len(cells) < len(labels): continue try: link = cells[labels.index('Name')].find('a') torrent_id = link['href'].replace('details.php?id=', '').split('&')[0] title = link.get_text(strip=True) download_url = self.urls['download'].format( torrent_id, '{0}.torrent'.format(title.replace(' ', '.')) ) if not all([title, download_url]): continue seeders = try_int(cells[labels.index('Seeders')].get_text(strip=True)) leechers = try_int(cells[labels.index('Leechers')].get_text(strip=True)) # Filter unseeded torrent if seeders < min(self.minseed, 1): if mode != 'RSS': log.debug("Discarding torrent because it doesn't meet the" " minimum seeders: {0}. Seeders: {1}", title, seeders) continue torrent_size = cells[labels.index('Size')].get_text() torrent_size = re.sub(r'(\d+\.?\d*)', r'\1 ', torrent_size) size = convert_size(torrent_size) or -1 item = { 'title': title, 'link': download_url, 'size': size, 'seeders': seeders, 'leechers': leechers, 'pubdate': None, } if mode != 'RSS': log.debug('Found result: {0} with {1} seeders and {2} leechers', title, seeders, leechers) items.append(item) except (AttributeError, TypeError, KeyError, ValueError, IndexError): log.error('Failed parsing provider. Traceback: {0!r}', traceback.format_exc()) return items
def parse(self, data, mode, **kwargs): """ Parse search results for items. :param data: The raw response from a search :param mode: The current mode used to search, e.g. RSS. :return: A list of items found """ items = [] keywords = kwargs.pop('keywords', None) with BS4Parser(data, 'html5lib') as html: torrent_table = html.find(id='sortabletable') torrent_rows = torrent_table('tr') if torrent_table else [] # Continue only if at least one release is found if len(torrent_rows) < 2: log.debug( 'Data returned from provider does not contain any torrents' ) return items labels = [ label.img['title'] if label.img else label.get_text(strip=True) for label in torrent_rows[0]('td') ] for torrent in torrent_rows[1:]: try: if self.freeleech and not torrent.find( 'img', alt=re.compile('TORRENT GRATUIT : Seulement ' 'l\'upload sera compter.')): continue title = torrent.find( class_='tooltip-content').div.get_text(strip=True) download_url = torrent.find( title='Télécharger le torrent!').parent['href'] if not all([title, download_url]): continue # Chop off tracker/channel prefix or we cannot parse the result! if mode != 'RSS' and keywords: show_name_first_word = re.search(r'^[^ .]+', keywords).group() if not title.startswith(show_name_first_word): title = re.sub( r'.*(' + show_name_first_word + '.*)', r'\1', title) seeders = try_int( torrent.find(title='Seeders').get_text(strip=True)) leechers = try_int( torrent.find(title='Leechers').get_text(strip=True)) # Filter unseeded torrent if seeders < min(self.minseed, 1): if mode != 'RSS': log.debug( "Discarding torrent because it doesn't meet the" " minimum seeders: {0}. Seeders: {1}", title, seeders) continue torrent_size = torrent('td')[labels.index( 'Taille')].get_text(strip=True) size = convert_size(torrent_size) or -1 pubdate_raw = torrent('td')[labels.index('Nom')].find_all( 'div')[-1].get_text(strip=True) pubdate = self.parse_pubdate(pubdate_raw, dayfirst=True) item = { 'title': title, 'link': download_url, 'size': size, 'seeders': seeders, 'leechers': leechers, 'pubdate': pubdate, } if mode != 'RSS': log.debug( 'Found result: {0} with {1} seeders and {2} leechers', title, seeders, leechers) items.append(item) except (AttributeError, TypeError, KeyError, ValueError, IndexError): log.error('Failed parsing provider. Traceback: {0!r}', traceback.format_exc()) return items
def parse(self, data, mode): """ Parse search results for items. :param data: The raw response from a search :param mode: The current mode used to search, e.g. RSS :return: A list of items found """ items = [] with BS4Parser( data, 'html.parser' ) as html: # Use html.parser, since html5parser has issues with this site. tables = html( 'table', width='800') # Get the last table with a width of 800px. torrent_table = tables[-1] if tables else [] torrent_rows = torrent_table('tr') if torrent_table else [] # Continue only if at least one release is found if len(torrent_rows) < 2: log.debug( 'Data returned from provider does not contain any torrents' ) return items # Skip column headers for row in torrent_rows[1:]: cells = row('td') if len(cells) < 3: # We must have cells[2] because it contains the title continue if self.freeleech and not row.get('bgcolor'): continue try: title = cells[2].find('a')['title'] if cells[2] else None download_url = urljoin( self.url, cells[0].find('a')['href']) if cells[0] else None if not all([title, download_url]): continue seeders = try_int(cells[8].get_text( strip=True)) if len(cells) > 8 else 1 leechers = try_int(cells[9].get_text( strip=True)) if len(cells) > 9 else 0 # Filter unseeded torrent if seeders < min(self.minseed, 1): if mode != 'RSS': log.debug( "Discarding torrent because it doesn't meet the" " minimum seeders: {0}. Seeders: {1}", title, seeders) continue torrent_size = cells[6].get_text( ' ') if len(cells) > 6 else None size = convert_size(torrent_size) or -1 pubdate_raw = cells[5].get_text(' ') pubdate = self.parse_pubdate(pubdate_raw) item = { 'title': title, 'link': download_url, 'size': size, 'seeders': seeders, 'leechers': leechers, 'pubdate': pubdate, } if mode != 'RSS': log.debug( 'Found result: {0} with {1} seeders and {2} leechers', title, seeders, leechers) items.append(item) except (AttributeError, TypeError, KeyError, ValueError, IndexError): log.error('Failed parsing provider. Traceback: {0!r}', traceback.format_exc()) return items
def parse(self, data, mode): """ Parse search results for items. :param data: The raw response from a search :param mode: The current mode used to search, e.g. RSS :return: A list of items found """ # Units units = ['B', 'KIB', 'MIB', 'GIB', 'TIB', 'PIB'] items = [] with BS4Parser(data, 'html5lib') as html: torrent_table = html.find('table', class_='mainblockcontenttt') torrent_rows = torrent_table('tr') if torrent_table else [] if not torrent_rows or torrent_rows[2].find('td', class_='lista'): log.debug('Data returned from provider does not contain any torrents') return items # Cat., Active, Filename, Dl, Wl, Added, Size, Uploader, S, L, C labels = [label.a.get_text(strip=True) if label.a else label.get_text(strip=True) for label in torrent_rows[0]('td')] # Skip column headers for row in torrent_rows[1:]: try: cells = row.findChildren('td')[:len(labels)] if len(cells) < len(labels): continue title = cells[labels.index('Filename')].a title = title.get_text(strip=True) if title else None link = cells[labels.index('Dl')].a link = link.get('href') if link else None download_url = urljoin(self.url, link) if link else None if not all([title, download_url]): continue seeders = try_int(cells[labels.index('S')].get_text(strip=True)) leechers = try_int(cells[labels.index('L')].get_text(strip=True)) # Filter unseeded torrent if seeders < self.minseed: if mode != 'RSS': log.debug("Discarding torrent because it doesn't meet the" ' minimum seeders: {0}. Seeders: {1}', title, seeders) continue torrent_size = cells[labels.index('Size')].get_text() size = convert_size(torrent_size, units=units) or -1 pubdate_raw = cells[labels.index('Added')].get_text() pubdate = self.parse_pubdate(pubdate_raw) item = { 'title': title, 'link': download_url, 'size': size, 'seeders': seeders, 'leechers': leechers, 'pubdate': pubdate, } if mode != 'RSS': log.debug('Found result: {0} with {1} seeders and {2} leechers', title, seeders, leechers) items.append(item) except (AttributeError, TypeError, KeyError, ValueError, IndexError): log.exception('Failed parsing provider.') return items
def parse(self, data, mode): """ Parse search results for items. :param data: The raw response from a search :param mode: The current mode used to search, e.g. RSS :return: A list of items found """ items = [] with BS4Parser(data, 'html5lib') as html: table_body = html.find('ul', class_='search-ret-list') # Continue only if at least one release is found if not table_body: log.debug( 'Data returned from provider does not contain any torrents' ) return items torrent_rows = table_body.find_all('li', class_='search-ret-item') for row in torrent_rows: try: title = row.find('h2').find('a').get('title') download_url = row.find('div').find('a').get( 'href') + self._custom_trackers if not all([title, download_url]): continue spans = row.find('div').find_all('span') seeders = leechers = 0 torrent_size = spans[0].get_text() size = convert_size(torrent_size, default=-1) torrent_pubdate = spans[2].get_text() pubdate = self.parse_pubdate(torrent_pubdate) item = { 'title': title, 'link': download_url, 'size': size, 'seeders': seeders, 'leechers': leechers, 'pubdate': pubdate, } if mode != 'RSS': log.debug( 'Found result: {0} with {1} seeders and {2} leechers', title, seeders, leechers) items.append(item) except (AttributeError, TypeError, KeyError, ValueError, IndexError): log.exception('Failed parsing provider.') return items
def parse(self, data, mode): """ Parse search results for items. :param data: The raw response from a search :param mode: The current mode used to search, e.g. RSS :return: A list of items found """ items = [] with BS4Parser(data, 'html5lib') as html: torrent_table = html.find('table', {'id': 'torrent_table'}) # Continue only if at least one release is found if not torrent_table: log.debug( 'Data returned from provider does not contain any torrents' ) return items torrent_rows = torrent_table('tr', {'class': 'torrent'}) # Continue only if one Release is found if not torrent_rows: log.debug( 'Data returned from provider does not contain any torrents' ) return items for row in torrent_rows: try: freeleech = row.find('img', alt='Freeleech') is not None if self.freeleech and not freeleech: continue download_item = row.find( 'a', { 'title': [ 'Download Torrent', # Download link 'Previously Grabbed Torrent File', # Already Downloaded 'Currently Seeding Torrent', # Seeding 'Currently Leeching Torrent', # Leeching ] }) if not download_item: continue download_url = urljoin(self.url, download_item['href']) temp_anchor = row.find('a', {'data-src': True}) title = temp_anchor['data-src'] if not all([title, download_url]): continue cells = row('td') seeders = try_int(cells[5].text.strip()) leechers = try_int(cells[6].text.strip()) # Filter unseeded torrent if seeders < self.minseed: if mode != 'RSS': log.debug( "Discarding torrent because it doesn't meet the" ' minimum seeders: {0}. Seeders: {1}', title, seeders) continue torrent_size = cells[2].find('div').get_text(strip=True) size = convert_size(torrent_size) or -1 pubdate_raw = cells[3].find('span')['title'] pubdate = self.parse_pubdate(pubdate_raw) item = { 'title': title, 'link': download_url, 'size': size, 'seeders': seeders, 'leechers': leechers, 'pubdate': pubdate, } if mode != 'RSS': log.debug( 'Found result: {0} with {1} seeders and {2} leechers', title, seeders, leechers) items.append(item) except (AttributeError, TypeError, KeyError, ValueError, IndexError): log.exception('Failed parsing provider.') return items
def parse(self, data, mode): """ Parse search results for items. :param data: The raw response from a search :param mode: The current mode used to search, e.g. RSS :return: A list of items found """ items = [] with BS4Parser(data, 'html5lib') as html: torrent_rows = html('item') for row in torrent_rows: try: if row.category and 'video' not in row.category.get_text( strip=True).lower(): continue title_raw = row.title.text # Add "-" after codec and add missing "." title = re.sub( r'([xh][ .]?264|xvid)( )', r'\1-', title_raw).replace( ' ', '.') if title_raw else '' info_hash = row.guid.text.rsplit('/', 1)[-1] download_url = 'magnet:?xt=urn:btih:' + info_hash + '&dn=' + title + self._custom_trackers if not all([title, download_url]): continue torrent_size, seeders, leechers = self._split_description( row.find('description').text) size = convert_size(torrent_size) or -1 pubdate_raw = row.pubdate.get_text() pubdate = self.parse_pubdate(pubdate_raw) # Filter unseeded torrent if seeders < min(self.minseed, 1): if mode != 'RSS': log.debug( "Discarding torrent because it doesn't meet the" ' minimum seeders: {0}. Seeders: {1}', title, seeders) continue item = { 'title': title, 'link': download_url, 'size': size, 'seeders': seeders, 'leechers': leechers, 'pubdate': pubdate, } if mode != 'RSS': log.debug( 'Found result: {0} with {1} seeders and {2} leechers', title, seeders, leechers) items.append(item) except (AttributeError, TypeError, KeyError, ValueError, IndexError): log.exception('Failed parsing provider.') return items