Beispiel #1
0
 def get_size(self, val):
     '''Get the result size in MB.
     '''
     self.size = get_size(val)
     if self.size is not None:
         return True
     logger.error('failed to get result size from "%s"', val)
Beispiel #2
0
    def results(self, query, sort='date', pages_max=1, **kwargs):
        sort = SORT_DEF[sort]
        for page in range(1, pages_max + 1):
            data = self._send(query, page, sort)
            tree = etree.fromstring(data)
            try:
                results = int(tree.xpath('hasResults')[0].text)
            except (ValueError, IndexError):
                raise SearchError('failed to get results count from "%s"' % data)
            if not results:
                return
            hits = int(tree.xpath('results/hitsForThisPage')[0].text)
            if not hits:
                return
            for res in tree.xpath('results/hits'):
                url = res.xpath('link')[0].text
                if not url:
                    logger.error('failed to get url from %s', data)
                    continue
                size = res.xpath('size')[0].text
                if not size:
                    logger.error('failed to get size from %s', data)
                    continue
                date = res.xpath('added')[0].text
                if not date:
                    logger.error('failed to get date from %s', data)
                    continue

                result = Result()
                result.auto = False
                result.type = 'filestube'
                result.title = clean(res.xpath('name')[0].text)
                result.url = url
                result.size = get_size(size)
                result.date = datetime.strptime(date, '%Y-%m-%d %H:%M:%S')
                if not result.validate(**kwargs):
                    continue
                yield result
Beispiel #3
0
 def test_size(self):
     for val, expected in self.fixtures:
         res = get_size(val)
         self.assertEqual(res, expected)
Beispiel #4
0
    def results(self, query, sort='date', pages_max=1, **kwargs):
        if not self.url:
            raise SearchError('no data')

        url = None

        for i in range(pages_max):
            if i == 0:
                if not self.browser.submit_form(url, fields={'q': query}):
                    raise SearchError('no data')
            else:
                tables = self.browser.cssselect('table')
                if not tables:
                    continue
                links = tables[-1].cssselect('a')
                if not links:
                    break
                next_text = self.get_link_text(html.tostring(links[-1]))
                if next_text != '>':
                    break
                url = urljoin(self.url, links[-1].get('href'))
                if not self.browser.open(url):
                    raise SearchError('no data')

            for tr in self.browser.cssselect('table#r2 tr', []):
                if tr.cssselect('th'):
                    continue

                log = html.tostring(tr, pretty_print=True)[:1000]

                result = Result()
                result.type = 'binsearch'

                titles = tr.cssselect('span.s')
                if not titles:
                    continue
                title = titles[0].text
                res = RE_TITLE.findall(title)
                if res:
                    title = res[0]
                result.title = clean(title)

                age = tr[-1].text
                if not age:
                    logger.error('failed to get age from %s', log)
                result.date = self._get_date(age)

                refs = tr.cssselect('input[type="checkbox"]')
                if not refs:
                    logger.error('failed to get references list from %s', log)
                    continue
                ref = refs[0].get('name')
                if not ref:
                    logger.error('failed to get reference from %s', log)
                    continue
                result.ref = ref

                info = tr.cssselect('span.d')
                if not info:
                    continue
                links = info[0].cssselect('a')
                if not links or not RE_COLLECTION.search(links[0].text):
                    continue
                result.url = urljoin(self.url, links[0].get('href'))

                info = clean(html.tostring(info[0]))
                if RE_PASSWORD.search(info):
                    continue

                res = RE_SIZE.search(info)
                if not res:
                    continue
                result.size = get_size(res.group(1))

                res = RE_PARTS.search(info)
                if not res or res.group(1) != res.group(2):
                    continue

                if not result.validate(**kwargs):
                    continue
                yield result