Example #1
0
    def results(self, query, category=None, sort='date', pages_max=1,
            **kwargs):
        if not self.url:
            raise SearchError('no data')

        for page in range(1, pages_max + 1):
            if page > 1:
                if not self._next(page):
                    break
            else:
                if is_url(query):
                    if not self.browser.open(query):
                        raise SearchError('no data')
                else:
                    fields = {'q': query}
                    if category:
                        val = CAT_DEF.get(category.lower())
                        if val:
                            fields['t'] = [val]
                    if not self.browser.submit_form(self.url, fields=fields):
                        raise SearchError('no data')
                    self._sort(sort)

            lis = self.browser.cssselect('#torrents li')
            if not lis:
                if lis is None:
                    raise SearchError('no data')
                elif RE_OVERLOAD.search(self.browser.tree.text_content()):
                    raise SearchError('overload')

            for el in lis:
                log = html.tostring(el, pretty_print=True)[:1000]

                result = Result()
                result.type = 'torrent'
                result.safe = False

                links = el.cssselect('a')
                if not links:
                    logger.error('failed to get title from %s', log)
                    continue
                result.title = clean(html.tostring(links[0]))

                details = el.cssselect('.torInfo')
                if not details:
                    logger.error('failed to get details from %s', log)
                    continue
                res = RE_DETAILS.search(html.tostring(details[0]))
                if not res:
                    continue
                result.category = res.group(1).strip(' ').lower()

                date = res.group(3)
                result.date = self._get_date(date)
                if not result.date:
                    logger.error('failed to get date from "%s"', date)
                    continue

                seeds = details[0].cssselect('span.seeders')
                if seeds:
                    try:
                        result.seeds = int(seeds[0].text.replace(',', ''))
                    except ValueError:
                        pass

                tds = el.cssselect('tr td')
                if not tds:
                    logger.error('failed to get size from %s', log)
                    continue
                if not result.get_size(tds[0].text):
                    continue

                url_info = urljoin(self.url, links[0].get('href')).encode('utf-8')
                result.url = self._get_torrent_url(url_info)
                if not result.url:
                    logger.error('failed to get magnet url from %s', url_info)
                    continue
                if not result.get_hash():
                    continue

                if not result.validate(**kwargs):
                    continue

                yield result
Example #2
0
    def results(self, query, category=None, sort='date', pages_max=1,
            **kwargs):
        if not self.url:
            raise SearchError('no data')

        for page in range(1, pages_max + 1):
            if page > 1:
                if not self._next(page):
                    break
            else:
                if is_url(query):
                    if not self.browser.open(query):
                        raise SearchError('no data')
                else:
                    if not self.browser.submit_form(self.url,
                            index=0, fields={'q': query}):
                        raise SearchError('no data')
                    if sort != 'popularity':     # default sort is peers ('popularity')
                        self._sort(sort)

            divs = self.browser.cssselect('div.results')
            if divs is None:
                raise SearchError('no data')

            # Skip approximate matches
            res = self.browser.cssselect('div.results h3')
            if res and RE_APPROXIMATE_MATCH.search(html.tostring(res[0])):
                break

            for div in divs:
                # Skip sponsored links
                res = div.cssselect('h2')
                if res and RE_SPONSORED_LINK.search(html.tostring(res[0])):
                    continue

                for dl in div.cssselect('dl'):
                    links = dl.cssselect('a')
                    if not links:
                        continue

                    log = html.tostring(dl, pretty_print=True)[:1000]

                    result = Result()
                    result.type = 'torrent'
                    result.safe = False

                    title = self.get_link_text(html.tostring(links[0]))
                    if not title:
                        continue
                    result.title = clean(title)

                    try:
                        res = RE_CATEGORIES.search(html.tostring(links[0]))
                        result.category = self._get_category(res.group(1))
                    except Exception:
                        logger.error('failed to get category info from %s', log)

                    if category and category != result.category:
                        continue

                    if dl.cssselect('span.pe'):     # skip 'pending' results (missing date and size)
                        continue

                    try:
                        date = dl.cssselect('.a')[0][0].get('title')
                        result.date = self._get_date(date)
                    except Exception:
                        logger.debug('failed to get date from %s', log)
                        continue
                    try:
                        size = dl.cssselect('.s')[0].text
                    except Exception:
                        logger.debug('failed to get size from %s', log)
                        continue
                    if not result.get_size(size):
                        continue

                    if not result.validate(**kwargs):
                        continue

                    try:
                        seeds = dl.cssselect('.d')[0].text
                        result.seeds = int(seeds.replace(',', ''))
                    except Exception:
                        logger.debug('failed to get seeds from %s', log)

                    # Find torrent url
                    url_info = urljoin(self.url, links[0].get('href'))
                    result.url = self._get_torrent_url(query, url_info)
                    if not result.url:
                        continue
                    if not result.get_hash():
                        continue
                    yield result
Example #3
0
    def results(self, query, category=None, sort='date', pages_max=1,
            **kwargs):
        if not self.url:
            raise SearchError('no data')

        for page in range(1, pages_max + 1):
            if page > 1:
                if not self._next(page):
                    break
            else:
                if is_url(query):
                    if not self.browser.open(query):
                        raise SearchError('no data')
                else:
                    fields = {'q': query}
                    if category:
                        val = CAT_DEF.get(category.lower())
                        if val:
                            fields[val] = ['on']
                    if not self.browser.submit_form(self.url, fields=fields):
                        raise SearchError('no data')
                    self._sort(sort)

            trs = self.browser.cssselect('#searchResult tr:not([class="header"])')
            if not trs:
                if trs is None:
                    raise SearchError('no data')
                elif RE_OVERLOAD.search(self.browser.tree.text_content()):
                    raise SearchError('overload')

            for tr in trs:
                if len(tr) < 4:
                    continue

                log = html.tostring(tr, pretty_print=True)[:1000]

                result = Result()
                result.type = 'torrent'
                result.safe = False

                try:
                    result.category = tr[0].cssselect('a')[0].text.lower()
                except Exception:
                    logger.error('failed to get category from %s', log)

                res = tr.cssselect('div.detName a')
                if not res:
                    logger.error('failed to get title from %s', log)
                    continue
                result.title = res[0].text

                result.url = self._get_torrent_url(tr)
                if not result.url:
                    logger.error('failed to get magnet url from %s', log)
                    continue
                if not result.get_hash():
                    continue

                res = tr.cssselect('.detDesc')
                if not res:
                    logger.error('failed to get details from %s', log)
                    continue
                details = clean(html.tostring(res[0]))
                res_ = RE_DETAILS.search(details)
                if not res_:
                    logger.error('failed to parse details: %s', details)
                    continue
                date, size = res_.groups()
                if not result.get_size(size):
                    continue

                if not result.validate(**kwargs):
                    continue

                try:
                    result.date = self._get_date(date)
                except Exception, e:
                    logger.error('failed to get date from "%s": %s', date, str(e))
                    continue

                try:
                    result.seeds = int(tr[2].text)
                except Exception:
                    pass
                yield result
Example #4
0
    def results(self, query, category=None, sort='date', pages_max=1,
            **kwargs):
        if not self.url:
            raise SearchError('no data')

        for page in range(1, pages_max + 1):
            if page > 1:
                if not self._next(page):
                    break
            else:
                if is_url(query):
                    if not self.browser.open(query):
                        raise SearchError('no data')
                else:
                    fields = {'ihq': query}
                    if not self.browser.submit_form(self.url,
                            fields=fields):
                        raise SearchError('no data')
                    self._sort(sort)

            trs = self.browser.cssselect('.table-torrents tr[data-key]')
            if not trs:
                if trs is None:
                    raise SearchError('no data')
                elif RE_OVERLOAD.search(self.browser.tree.text_content()):
                    raise SearchError('overload')

            for tr in trs:
                log = html.tostring(tr, pretty_print=True)[:1000]

                result = Result()
                result.type = 'torrent'
                result.safe = False

                category_ = tr.cssselect('.category-row span')
                if not category_:
                    category = None
                else:
                    try:
                        category = category_[0].get('title').lower()
                    except Exception:
                        category = None
                if not category:
                    logger.error('failed to get category from %s', log)
                else:
                    result.category = category

                links_ = tr.cssselect('.title-row a')
                if not links_:
                    logger.error('failed to get title link from %s', log)
                    continue
                try:
                    result.title = links_[0].cssselect('span')[0].text
                except Exception:
                    logger.error('failed to get title from %s', log)
                    continue

                url_info = urljoin(self.url, links_[0].get('href'))

                size_ = tr.cssselect('.size-row')
                if not size_:
                    logger.error('failed to get size from %s', log)
                    continue
                size = size_[0].text
                if not result.get_size(size):
                    logger.error('failed to get size from "%s"', size)
                    continue

                date_ = tr.cssselect('.date-row')
                if not date_:
                    logger.error('failed to get size from %s', log)
                    continue
                date = date_[0].text
                try:
                    result.date = self._get_date(date)
                except Exception, e:
                    logger.error('failed to get date from "%s": %s', date, str(e))
                    continue

                if not result.validate(**kwargs):
                    continue

                result.url = self._get_torrent_url(url_info)
                if not result.url:
                    logger.error('failed to get magnet url from %s', url_info)
                    continue
                if not result.get_hash():
                    continue

                try:
                    result.seeds = int(tr[-2].text)
                except Exception:
                    logger.error('failed to get seeds from %s', log)

                yield result