def search(query_string=None, cmd=None, search_engine='google', max_download=5): engine = get_engine(search_engine) search_req = engine.get_search_request(query_string) search_result = get(search_req) if isinstance(search_result, DownloadError): raise SearchError('Failed search on {} ({})'.format( search_engine, search_result.status_code)) urls = engine.get_hits(search_result) docs = iter_get([Request(u.url) for u in urls[:max_download]]) return extract_commands(docs, base_commands=cmd)
def test_extract_commands(self): cmds = extract_commands(iter_html_docs(TEST_DATA_DIR)) self.assertEqual(set(cmds.commands.keys()), MERGED_COMMANDS) cmds = extract_commands(iter_html_docs(TEST_DATA_DIR), 'xargs') self.assertEqual(set(cmds.commands.keys()), set([ 'find /tmp -name "*.tmp" | xargs rm', u'find ./music -name "*.mp3" -print0 | xargs -0 ls', 'find . -name "*.sh" | xargs grep "ksh"', 'find /tmp -name "*.tmp" -print0 | xargs -0 rm', 'find . -name "*.sh" -print0 | xargs -0 -I {} mv {} ~/back.scripts', u'find ./work -print | xargs grep "profit"'])) cmds = extract_commands( get_html_doc(TEST_DATA_DIR, 'stackoverflow.com'), 'xargs') self.assertEqual(cmds.commands, {}) doc = HtmlDocument('http://stackoverflow.com', b'') doc.body = None cmds = extract_commands(doc) self.assertEqual(cmds.nr_docs, 0)