def results(self, query, sort='date', pages_max=1, **kwargs): sort = SORT_DEF[sort] for page in range(1, pages_max + 1): data = self._send(query, page, sort) tree = etree.fromstring(data) try: results = int(tree.xpath('hasResults')[0].text) except (ValueError, IndexError): raise SearchError('failed to get results count from "%s"' % data) if not results: return hits = int(tree.xpath('results/hitsForThisPage')[0].text) if not hits: return for res in tree.xpath('results/hits'): url = res.xpath('link')[0].text if not url: logger.error('failed to get url from %s', data) continue size = res.xpath('size')[0].text if not size: logger.error('failed to get size from %s', data) continue date = res.xpath('added')[0].text if not date: logger.error('failed to get date from %s', data) continue result = Result() result.auto = False result.type = 'filestube' result.title = clean(res.xpath('name')[0].text) result.url = url result.size = get_size(size) result.date = datetime.strptime(date, '%Y-%m-%d %H:%M:%S') if not result.validate(**kwargs): continue yield result
def results(self, query, sort='date', pages_max=1, **kwargs): if not self.url: raise SearchError('no data') url = None for i in range(pages_max): if i == 0: if not self.browser.submit_form(url, fields={'q': query}): raise SearchError('no data') else: tables = self.browser.cssselect('table') if not tables: continue links = tables[-1].cssselect('a') if not links: break next_text = self.get_link_text(html.tostring(links[-1])) if next_text != '>': break url = urljoin(self.url, links[-1].get('href')) if not self.browser.open(url): raise SearchError('no data') for tr in self.browser.cssselect('table#r2 tr', []): if tr.cssselect('th'): continue log = html.tostring(tr, pretty_print=True)[:1000] result = Result() result.type = 'binsearch' titles = tr.cssselect('span.s') if not titles: continue title = titles[0].text res = RE_TITLE.findall(title) if res: title = res[0] result.title = clean(title) age = tr[-1].text if not age: logger.error('failed to get age from %s', log) result.date = self._get_date(age) refs = tr.cssselect('input[type="checkbox"]') if not refs: logger.error('failed to get references list from %s', log) continue ref = refs[0].get('name') if not ref: logger.error('failed to get reference from %s', log) continue result.ref = ref info = tr.cssselect('span.d') if not info: continue links = info[0].cssselect('a') if not links or not RE_COLLECTION.search(links[0].text): continue result.url = urljoin(self.url, links[0].get('href')) info = clean(html.tostring(info[0])) if RE_PASSWORD.search(info): continue res = RE_SIZE.search(info) if not res: continue result.size = get_size(res.group(1)) res = RE_PARTS.search(info) if not res or res.group(1) != res.group(2): continue if not result.validate(**kwargs): continue yield result