def iter_torrents(self): for div in self.parser.select(self.document.getroot(),'div.list_tor'): name = NotAvailable size = NotAvailable seeders = NotAvailable leechers = NotAvailable right_div = self.parser.select(div,'div.list_tor_right',1) try: seeders = int(self.parser.select(right_div,'b.green',1).text) except ValueError: seeders = 0 try: leechers = int(self.parser.select(right_div,'b.red',1).text) except ValueError: leechers = 0 sizep = self.parser.select(right_div,'p')[0] sizespan = self.parser.select(sizep,'span')[0] nsize = float(sizespan.text_content().split(':')[1].split()[0]) usize = sizespan.text_content().split()[-1].upper() size = get_bytes_size(nsize,usize) a = self.parser.select(div,'a.list_tor_title',1) href = a.attrib.get('href','') name = unicode(a.text_content()) id = unicode(href.strip('/').split('.html')[0]) torrent = Torrent(id,name) torrent.url = NotLoaded torrent.filename = id torrent.magnet = NotLoaded torrent.size = size torrent.seeders = seeders torrent.leechers = leechers torrent.description = NotLoaded torrent.files = NotLoaded yield torrent
def get_torrent(self, id): seed = 0 leech = 0 description = NotAvailable url = NotAvailable title = NotAvailable for div in self.document.getiterator('div'): if div.attrib.get('id', '') == 'desc': try: description = div.text_content().strip() except UnicodeDecodeError: description = 'Description with invalid UTF-8.' elif div.attrib.get('class', '') == 'seedBlock': if div.getchildren()[1].text is not None: seed = int(div.getchildren()[1].text) else: seed = 0 elif div.attrib.get('class', '') == 'leechBlock': if div.getchildren()[1].text is not None: leech = int(div.getchildren()[1].text) else: leech = 0 title = self.parser.select(self.document.getroot(), 'h1.torrentName span', 1) title = title.text for a in self.document.getiterator('a'): if ('Download' in a.attrib.get('title', '')) \ and ('torrent file' in a.attrib.get('title', '')): url = a.attrib.get('href', '') size = 0 u = '' for span in self.document.getiterator('span'): # sometimes there are others span, this is not so sure but the size of the children list # is enough to know if this is the right span if (span.attrib.get('class', '') == 'folder' \ or span.attrib.get('class', '') == 'folderopen') \ and len(span.getchildren()) > 2: size = span.getchildren()[1].tail u = span.getchildren()[2].text size = float(size.split(': ')[1].replace(',', '.')) files = [] for td in self.document.getiterator('td'): if td.attrib.get('class', '') == 'torFileName': files.append(td.text) torrent = Torrent(id, title) torrent.url = url if torrent.url: torrent.filename = parse_qs(urlsplit(url).query).get( 'title', [None])[0] torrent.size = get_bytes_size(size, u) torrent.seeders = int(seed) torrent.leechers = int(leech) torrent.description = description torrent.files = files return torrent
def get_torrent(self, id): trs = self.document.getroot().cssselect('table.torrent_info_tbl tr') # magnet download = trs[2].cssselect('td a')[0] if download.attrib['href'].startswith('magnet:'): magnet = unicode(download.attrib['href']) query = urlparse(magnet).query # xt=urn:btih:<...>&dn=<...> btih = parse_qs(query)['xt'][0] # urn:btih:<...> ih = btih.split(':')[-1] name = unicode(trs[3].cssselect('td')[1].text) value, unit = trs[5].cssselect('td')[1].text.split() valueago, valueunit, _ = trs[6].cssselect('td')[1].text.split() delta = timedelta(**{valueunit: float(valueago)}) date = datetime.now() - delta files = [unicode(tr.cssselect('td')[1].text) for tr in trs[15:]] torrent = Torrent(ih, name) torrent.url = unicode(self.url) torrent.size = get_bytes_size(float(value), unit) torrent.magnet = magnet torrent.seeders = NotAvailable torrent.leechers = NotAvailable torrent.description = NotAvailable torrent.files = files torrent.filename = NotAvailable torrent.date = date return torrent
def iter_torrents(self): for tr in self.document.getiterator('tr'): if tr.attrib.get('class', '') == 'odd' or tr.attrib.get('class', '') == ' even': if not 'id' in tr.attrib: continue title = tr.getchildren()[0].getchildren()[1].getchildren()[1].text if not title: title = '' for red in tr.getchildren()[0].getchildren()[1].getchildren()[1].getchildren(): title += red.text_content() idt = tr.getchildren()[0].getchildren()[1].getchildren()[1].attrib.get('href', '').replace('/', '') \ .replace('.html', '') # look for url for a in tr.getchildren()[0].getiterator('a'): if '.torrent' in a.attrib.get('href', ''): url = a.attrib['href'] size = tr.getchildren()[1].text u = tr.getchildren()[1].getchildren()[0].text size = size = size.replace(',', '.') size = float(size) seed = tr.getchildren()[4].text leech = tr.getchildren()[5].text torrent = Torrent(idt, title) torrent.url = url torrent.filename = parse_qs(urlsplit(url).query).get('title', [None])[0] torrent.size = get_bytes_size(size, u) torrent.seeders = int(seed) torrent.leechers = int(leech) yield torrent
def get_torrent(self, id): seed = 0 leech = 0 description = NotAvailable url = NotAvailable title = NotAvailable for div in self.document.getiterator('div'): if div.attrib.get('id', '') == 'desc': try: description = div.text_content().strip() except UnicodeDecodeError: description = 'Description with invalid UTF-8.' elif div.attrib.get('class', '') == 'seedBlock': if div.getchildren()[1].text is not None: seed = int(div.getchildren()[1].text) else: seed = 0 elif div.attrib.get('class', '') == 'leechBlock': if div.getchildren()[1].text is not None: leech = int(div.getchildren()[1].text) else: leech = 0 title = self.parser.select(self.document.getroot(), 'h1.torrentName span', 1) title = title.text for a in self.document.getiterator('a'): if ('Download' in a.attrib.get('title', '')) \ and ('torrent file' in a.attrib.get('title', '')): url = a.attrib.get('href', '') size = 0 u = '' for span in self.document.getiterator('span'): # sometimes there are others span, this is not so sure but the size of the children list # is enough to know if this is the right span if (span.attrib.get('class', '') == 'folder' \ or span.attrib.get('class', '') == 'folderopen') \ and len(span.getchildren()) > 2: size = span.getchildren()[1].tail u = span.getchildren()[2].text size = float(size.split(': ')[1].replace(',', '.')) files = [] for td in self.document.getiterator('td'): if td.attrib.get('class', '') == 'torFileName': files.append(td.text) torrent = Torrent(id, title) torrent.url = url if torrent.url: torrent.filename = parse_qs(urlsplit(url).query).get('title', [None])[0] torrent.size = get_bytes_size(size, u) torrent.seeders = int(seed) torrent.leechers = int(leech) torrent.description = description torrent.files = files return torrent
def iter_torrents(self): for tr in self.document.getiterator('tr'): if tr.attrib.get('class', '') == 'odd' or tr.attrib.get( 'class', '') == ' even': magnet = NotAvailable url = NotAvailable if not 'id' in tr.attrib: continue title = tr.getchildren()[0].getchildren()[1].getchildren( )[1].text if not title: title = u'' else: title = unicode(title) for red in tr.getchildren()[0].getchildren()[1].getchildren( )[1].getchildren(): title += red.text_content() idt = tr.getchildren()[0].getchildren()[1].getchildren()[1].attrib.get('href', '').replace('/', '') \ .replace('.html', '') # look for url for a in self.parser.select(tr, 'div.iaconbox a'): href = a.attrib.get('href', '') if href.startswith('magnet'): magnet = unicode(href) elif href.startswith('http'): url = unicode(href) elif href.startswith('//'): url = u'https:%s' % href size = tr.getchildren()[1].text u = tr.getchildren()[1].getchildren()[0].text size = size = size.replace(',', '.') size = float(size) seed = tr.getchildren()[4].text leech = tr.getchildren()[5].text torrent = Torrent(idt, title) torrent.url = url torrent.magnet = magnet torrent.description = NotLoaded torrent.files = NotLoaded torrent.filename = unicode( parse_qs(urlsplit(url).query).get('title', [None])[0]) torrent.size = get_bytes_size(size, u) torrent.seeders = int(seed) torrent.leechers = int(leech) yield torrent
def get_torrent(self): seed = 0 leech = 0 description = NotAvailable url = NotAvailable magnet = NotAvailable title = NotAvailable id = unicode(self.browser.geturl().split('.html')[0].split('/')[-1]) div = self.parser.select(self.document.getroot(),'div#middle_content',1) title = u'%s'%self.parser.select(self.document.getroot(),'div#middle_content > h1',1).text slblock_values = self.parser.select(div,'div.sl_block b') if len(slblock_values) >= 2: seed = slblock_values[0].text leech = slblock_values[1].text href_t = self.parser.select(div,'a.down',1).attrib.get('href','') url = u'http://%s%s'%(self.browser.DOMAIN,href_t) magnet = unicode(self.parser.select(div,'a.magnet',1).attrib.get('href','')) divtabs = self.parser.select(div,'div#tabs',1) files_div = self.parser.select(divtabs,'div.body > div.doubleblock > div.leftblock') files = [] if len(files_div) > 0: size_text = self.parser.select(files_div,'h5',1).text for b in self.parser.select(files_div,'b'): div = b.getparent() files.append(div.text_content()) else: size_text = self.parser.select(divtabs,'h5',1).text_content() size_text = size_text.split('(')[1].split(')')[0].strip() size = float(size_text.split(',')[1].strip(string.letters)) u = size_text.split(',')[1].strip().translate(None,string.digits).strip('.').strip().upper() div_desc = self.parser.select(divtabs,'div#descriptionContent') if len(div_desc) > 0: description = unicode(div_desc[0].text_content()) torrent = Torrent(id, title) torrent.url = url torrent.filename = id torrent.magnet = magnet torrent.size = get_bytes_size(size, u) torrent.seeders = int(seed) torrent.leechers = int(leech) torrent.description = description torrent.files = files return torrent
def iter_torrents(self): for tr in self.document.getiterator('tr'): if tr.attrib.get('class', '') == 'odd' or tr.attrib.get('class', '') == ' even': magnet = NotAvailable url = NotAvailable if not 'id' in tr.attrib: continue title = tr.getchildren()[0].getchildren()[1].getchildren()[1].text if not title: title = u'' else: title = unicode(title) for red in tr.getchildren()[0].getchildren()[1].getchildren()[1].getchildren(): title += red.text_content() idt = tr.getchildren()[0].getchildren()[1].getchildren()[1].attrib.get('href', '').replace('/', '') \ .replace('.html', '') # look for url for a in self.parser.select(tr, 'div.iaconbox a'): href = a.attrib.get('href', '') if href.startswith('magnet'): magnet = unicode(href) elif href.startswith('http'): url = unicode(href) elif href.startswith('//'): url = u'https:%s' % href size = tr.getchildren()[1].text u = tr.getchildren()[1].getchildren()[0].text size = size = size.replace(',', '.') size = float(size) seed = tr.getchildren()[4].text leech = tr.getchildren()[5].text torrent = Torrent(idt, title) torrent.url = url torrent.magnet = magnet torrent.description = NotLoaded torrent.files = NotLoaded torrent.filename = unicode(parse_qs(urlsplit(url).query).get('title', [None])[0]) torrent.size = get_bytes_size(size, u) torrent.seeders = int(seed) torrent.leechers = int(leech) yield torrent
def get_torrent(self, id): trs = self.document.getroot().cssselect('table.torrent_info_tbl tr') # magnet download = trs[2].cssselect('td a')[0] if download.attrib['href'].startswith('magnet:'): magnet = unicode(download.attrib['href']) query = urlparse(magnet).query # xt=urn:btih:<...>&dn=<...> btih = parse_qs(query)['xt'][0] # urn:btih:<...> ih = btih.split(':')[-1] name = unicode(trs[3].cssselect('td')[1].text) value, unit = trs[5].cssselect('td')[1].text.split() valueago, valueunit, _ = trs[6].cssselect('td')[1].text.split() delta = timedelta(**{valueunit: float(valueago)}) date = datetime.now() - delta files = [] for tr in trs[15:]: files.append(unicode(tr.cssselect('td')[1].text)) torrent = Torrent(ih, name) torrent.url = unicode(self.url) torrent.size = get_bytes_size(float(value), unit) torrent.magnet = magnet torrent.seeders = NotAvailable torrent.leechers = NotAvailable torrent.description = NotAvailable torrent.files = files torrent.filename = NotAvailable torrent.date = date return torrent
def iter_torrents(self): for tr in self.document.getiterator('tr'): if tr.attrib.get('class', '') == 'odd' or tr.attrib.get( 'class', '') == ' even': if not 'id' in tr.attrib: continue title = tr.getchildren()[0].getchildren()[1].getchildren( )[1].text if not title: title = '' for red in tr.getchildren()[0].getchildren()[1].getchildren( )[1].getchildren(): title += red.text_content() idt = tr.getchildren()[0].getchildren()[1].getchildren()[1].attrib.get('href', '').replace('/', '') \ .replace('.html', '') # look for url for a in tr.getchildren()[0].getiterator('a'): if '.torrent' in a.attrib.get('href', ''): url = a.attrib['href'] size = tr.getchildren()[1].text u = tr.getchildren()[1].getchildren()[0].text size = size = size.replace(',', '.') size = float(size) seed = tr.getchildren()[4].text leech = tr.getchildren()[5].text torrent = Torrent(idt, title) torrent.url = url torrent.filename = parse_qs(urlsplit(url).query).get( 'title', [None])[0] torrent.size = get_bytes_size(size, u) torrent.seeders = int(seed) torrent.leechers = int(leech) yield torrent
def get_torrent(self, id): seed = 0 leech = 0 description = NotAvailable url = NotAvailable magnet = NotAvailable title = NotAvailable for div in self.document.getiterator('div'): if div.attrib.get('id', '') == 'desc': try: description = unicode(div.text_content().strip()) except UnicodeDecodeError: description = 'Description with invalid UTF-8.' elif div.attrib.get('class', '') == 'seedBlock': if div.getchildren()[1].text is not None: seed = int(div.getchildren()[1].text) else: seed = 0 elif div.attrib.get('class', '') == 'leechBlock': if div.getchildren()[1].text is not None: leech = int(div.getchildren()[1].text) else: leech = 0 title = self.parser.select(self.document.getroot(), 'h1.novertmarg span', 1) title = unicode(title.text) for a in self.parser.select(self.document.getroot(), 'div.downloadButtonGroup a'): href = a.attrib.get('href', '') if href.startswith('magnet'): magnet = unicode(href) elif href.startswith('//'): url = u'https:%s' % href elif href.startswith('http'): url = unicode(href) size = 0 u = '' for span in self.document.getiterator('span'): # sometimes there are others span, this is not so sure but the size of the children list # is enough to know if this is the right span if (span.attrib.get('class', '') in ['folder', 'folderopen'] and len(span.getchildren()) > 2): size = span.getchildren()[1].tail u = span.getchildren()[2].text size = float(size.split(': ')[1].replace(',', '.')) files = [ td.text for td in self.document.getiterator('td') if td.attrib.get('class', '') == 'torFileName' ] torrent = Torrent(id, title) torrent.url = url if torrent.url: torrent.filename = parse_qs(urlsplit(url).query).get( 'title', [None])[0] torrent.magnet = magnet torrent.size = get_bytes_size(size, u) torrent.seeders = int(seed) torrent.leechers = int(leech) if description == '': description = NotAvailable torrent.description = description torrent.files = files return torrent