def results(self, query, category=None, sort='date', pages_max=1, **kwargs): if not self.url: raise SearchError('no data') for page in range(1, pages_max + 1): if page > 1: if not self._next(page): break else: if is_url(query): if not self.browser.open(query): raise SearchError('no data') else: fields = {'q': query} if category: val = CAT_DEF.get(category.lower()) if val: fields['t'] = [val] if not self.browser.submit_form(self.url, fields=fields): raise SearchError('no data') self._sort(sort) lis = self.browser.cssselect('#torrents li') if not lis: if lis is None: raise SearchError('no data') elif RE_OVERLOAD.search(self.browser.tree.text_content()): raise SearchError('overload') for el in lis: log = html.tostring(el, pretty_print=True)[:1000] result = Result() result.type = 'torrent' result.safe = False links = el.cssselect('a') if not links: logger.error('failed to get title from %s', log) continue result.title = clean(html.tostring(links[0])) details = el.cssselect('.torInfo') if not details: logger.error('failed to get details from %s', log) continue res = RE_DETAILS.search(html.tostring(details[0])) if not res: continue result.category = res.group(1).strip(' ').lower() date = res.group(3) result.date = self._get_date(date) if not result.date: logger.error('failed to get date from "%s"', date) continue seeds = details[0].cssselect('span.seeders') if seeds: try: result.seeds = int(seeds[0].text.replace(',', '')) except ValueError: pass tds = el.cssselect('tr td') if not tds: logger.error('failed to get size from %s', log) continue if not result.get_size(tds[0].text): continue url_info = urljoin(self.url, links[0].get('href')).encode('utf-8') result.url = self._get_torrent_url(url_info) if not result.url: logger.error('failed to get magnet url from %s', url_info) continue if not result.get_hash(): continue if not result.validate(**kwargs): continue yield result
def results(self, query, category=None, sort='date', pages_max=1, **kwargs): if not self.url: raise SearchError('no data') for page in range(1, pages_max + 1): if page > 1: if not self._next(page): break else: if is_url(query): if not self.browser.open(query): raise SearchError('no data') else: fields = {'q': query} if category: val = CAT_DEF.get(category.lower()) if val: fields[val] = ['on'] if not self.browser.submit_form(self.url, fields=fields): raise SearchError('no data') self._sort(sort) trs = self.browser.cssselect('#searchResult tr:not([class="header"])') if not trs: if trs is None: raise SearchError('no data') elif RE_OVERLOAD.search(self.browser.tree.text_content()): raise SearchError('overload') for tr in trs: if len(tr) < 4: continue log = html.tostring(tr, pretty_print=True)[:1000] result = Result() result.type = 'torrent' result.safe = False try: result.category = tr[0].cssselect('a')[0].text.lower() except Exception: logger.error('failed to get category from %s', log) res = tr.cssselect('div.detName a') if not res: logger.error('failed to get title from %s', log) continue result.title = res[0].text result.url = self._get_torrent_url(tr) if not result.url: logger.error('failed to get magnet url from %s', log) continue if not result.get_hash(): continue res = tr.cssselect('.detDesc') if not res: logger.error('failed to get details from %s', log) continue details = clean(html.tostring(res[0])) res_ = RE_DETAILS.search(details) if not res_: logger.error('failed to parse details: %s', details) continue date, size = res_.groups() if not result.get_size(size): continue if not result.validate(**kwargs): continue try: result.date = self._get_date(date) except Exception, e: logger.error('failed to get date from "%s": %s', date, str(e)) continue try: result.seeds = int(tr[2].text) except Exception: pass yield result
def results(self, query, category=None, sort='date', pages_max=1, **kwargs): if not self.url: raise SearchError('no data') for page in range(1, pages_max + 1): if page > 1: if not self._next(page): break else: if is_url(query): if not self.browser.open(query): raise SearchError('no data') else: fields = {'ihq': query} if not self.browser.submit_form(self.url, fields=fields): raise SearchError('no data') self._sort(sort) trs = self.browser.cssselect('.table-torrents tr[data-key]') if not trs: if trs is None: raise SearchError('no data') elif RE_OVERLOAD.search(self.browser.tree.text_content()): raise SearchError('overload') for tr in trs: log = html.tostring(tr, pretty_print=True)[:1000] result = Result() result.type = 'torrent' result.safe = False category_ = tr.cssselect('.category-row span') if not category_: category = None else: try: category = category_[0].get('title').lower() except Exception: category = None if not category: logger.error('failed to get category from %s', log) else: result.category = category links_ = tr.cssselect('.title-row a') if not links_: logger.error('failed to get title link from %s', log) continue try: result.title = links_[0].cssselect('span')[0].text except Exception: logger.error('failed to get title from %s', log) continue url_info = urljoin(self.url, links_[0].get('href')) size_ = tr.cssselect('.size-row') if not size_: logger.error('failed to get size from %s', log) continue size = size_[0].text if not result.get_size(size): logger.error('failed to get size from "%s"', size) continue date_ = tr.cssselect('.date-row') if not date_: logger.error('failed to get size from %s', log) continue date = date_[0].text try: result.date = self._get_date(date) except Exception, e: logger.error('failed to get date from "%s": %s', date, str(e)) continue if not result.validate(**kwargs): continue result.url = self._get_torrent_url(url_info) if not result.url: logger.error('failed to get magnet url from %s', url_info) continue if not result.get_hash(): continue try: result.seeds = int(tr[-2].text) except Exception: logger.error('failed to get seeds from %s', log) yield result
def results(self, query, category=None, sort='date', pages_max=1, **kwargs): if not self.url: raise SearchError('no data') for page in range(1, pages_max + 1): if page > 1: if not self._next(page): break else: if is_url(query): if not self.browser.open(query): raise SearchError('no data') else: if not self.browser.submit_form(self.url, index=0, fields={'q': query}): raise SearchError('no data') if sort != 'popularity': # default sort is peers ('popularity') self._sort(sort) divs = self.browser.cssselect('div.results') if divs is None: raise SearchError('no data') # Skip approximate matches res = self.browser.cssselect('div.results h3') if res and RE_APPROXIMATE_MATCH.search(html.tostring(res[0])): break for div in divs: # Skip sponsored links res = div.cssselect('h2') if res and RE_SPONSORED_LINK.search(html.tostring(res[0])): continue for dl in div.cssselect('dl'): links = dl.cssselect('a') if not links: continue log = html.tostring(dl, pretty_print=True)[:1000] result = Result() result.type = 'torrent' result.safe = False title = self.get_link_text(html.tostring(links[0])) if not title: continue result.title = clean(title) try: res = RE_CATEGORIES.search(html.tostring(links[0])) result.category = self._get_category(res.group(1)) except Exception: logger.error('failed to get category info from %s', log) if category and category != result.category: continue if dl.cssselect('span.pe'): # skip 'pending' results (missing date and size) continue try: date = dl.cssselect('.a')[0][0].get('title') result.date = self._get_date(date) except Exception: logger.debug('failed to get date from %s', log) continue try: size = dl.cssselect('.s')[0].text except Exception: logger.debug('failed to get size from %s', log) continue if not result.get_size(size): continue if not result.validate(**kwargs): continue try: seeds = dl.cssselect('.d')[0].text result.seeds = int(seeds.replace(',', '')) except Exception: logger.debug('failed to get seeds from %s', log) # Find torrent url url_info = urljoin(self.url, links[0].get('href')) result.url = self._get_torrent_url(query, url_info) if not result.url: continue if not result.get_hash(): continue yield result
def results(self, query, category=None, pages_max=1, **kwargs): if not self.url: raise SearchError('no data') url = '%s?%s' % (QUERY_URL, urlencode({'nm': query})) for page in range(1, pages_max + 1): if page > 1: if not self._next(page): break else: if not self.browser.open(url): raise SearchError('no data') trs = self.browser.cssselect('#tor-tbl tbody tr') if not trs: if trs is None: raise SearchError('no data') elif RE_OVERLOAD.search(self.browser.tree.text_content()): raise SearchError('overload') for el in trs: if len(el) == 1: continue log = html.tostring(el, pretty_print=True)[:1000] result = Result() result.type = 'rutracker' result.safe = False result.category = None links = el[3].cssselect('a') if not links: logger.error('failed to get title from %s', log) continue result.title = clean(html.tostring(links[0])) links = el[5].cssselect('a') if not links: logger.debug('failed to get torrent url from %s', html.tostring(el[5])) continue result.url = links[0].get('href') size = clean(links[0].text or '').replace('_', ' ').strip() if not result.get_size(size): continue seeds = el[6].cssselect('.seedmed') if seeds: try: result.seeds = int(seeds[0].text) except ValueError: pass els = el[9].cssselect('u') if not els: logger.error('failed to get date from %s', log) continue try: result.date = datetime.utcfromtimestamp(int(els[0].text)) except ValueError: logger.error('failed to get date from %s', els[0].text) continue if not result.validate(**kwargs): continue yield result