def test_iter_get(self): docs = list(iter_html_docs('cmdextract')) reqs = [download.Request(doc.url.url) for doc in docs] with requests_mock.mock() as m: for req, doc in zip(reqs[:-1], docs[:-1]): m.get(req.url, content=doc.body) resps = list(download.iter_get(reqs)) success = [doc.url.url.strip('/') for doc in resps if isinstance(doc, download.HtmlDocument)] for resp in resps: if isinstance(resp, download.DownloadError): if isinstance(resp.err, UnicodeDecodeError): raise resp.err self.assertEqual(set(success), set([r.url for r in reqs[:-1]])) m.get(reqs[-1].url, content=docs[-1].body) resps = list(download.iter_get(reqs[-1])) self.assertEqual( [r.url.url.strip('/') for r in resps], [reqs[-1].url])
def search(query_string=None, cmd=None, search_engine='google', max_download=5): engine = get_engine(search_engine) search_req = engine.get_search_request(query_string) search_result = get(search_req) if isinstance(search_result, DownloadError): raise SearchError('Failed search on {} ({})'.format( search_engine, search_result.status_code)) urls = engine.get_hits(search_result) docs = iter_get([Request(u.url) for u in urls[:max_download]]) return extract_commands(docs, base_commands=cmd)
def test_iter_get(self): docs = list(iter_html_docs('cmdextract')) reqs = [download.Request(doc.url.url) for doc in docs] with requests_mock.mock() as m: for req, doc in zip(reqs[:-1], docs[:-1]): m.get(req.url, content=doc.body) resps = list(download.iter_get(reqs)) success = [ doc.url.url.strip('/') for doc in resps if isinstance(doc, download.HtmlDocument) ] for resp in resps: if isinstance(resp, download.DownloadError): if isinstance(resp.err, UnicodeDecodeError): raise resp.err self.assertEqual(set(success), set([r.url for r in reqs[:-1]])) m.get(reqs[-1].url, content=docs[-1].body) resps = list(download.iter_get(reqs[-1])) self.assertEqual([r.url.url.strip('/') for r in resps], [reqs[-1].url])