def analyse_sentiment_yahoo(word = ''): # リアルタイム検索 USER_AGENT = {'User-Agent': 'Mozilla/5.0'} #Needed to prevent 403 error phantomjs_path = '/usr/local/bin/phantomjs' driver = webdriver.PhantomJS(executable_path=phantomjs_path, service_log_path=os.path.devnull, desired_capabilities={'phantomjs.page.settings.userAgent':USER_AGENT}) driver.get("http://realtime.search.yahoo.co.jp/realtime") try: elem = driver.find_element_by_name('p') except: return False elem.clear() elem.send_keys(word) elem.send_keys(Keys.RETURN) time.sleep(1) html = driver.page_source.encode('utf-8') # more sophisticated methods may be available soup = bs4.BeautifulSoup(html, 'lxml') ptext = soup.findAll('script') pstr = ''.join([p.get_text() for p in ptext]) reg = 'YAHOO.JP.srch.rt.sentiment = (?P<json>.+)' compiled_reg = re.compile(reg, re.M) reg_ls = compiled_reg.search(pstr) if reg_ls: reg_ls_json = reg_ls.groupdict() senti_json = reg_ls_json['json'] if senti_json: sentiment_dic = json.loads(senti_json) return sentiment_dic
def search_wiki(word = 'クロマニョン人'): ans = '' try: converted_word = urllib.parse.quote_plus(word, encoding="utf-8") wiki_url = ''.join(["https://ja.wikipedia.org/wiki/", converted_word]) soup = get_bs4soup(wiki_url) ptext = soup.findAll("p") pstr = ''.join([p.get_text() for p in ptext]) ans = re.sub(re.compile('\<.+\>'), '' , pstr) ans = ans.replace('この記事には複数の問題があります。改善やノートページでの議論にご協力ください。', '').replace('■カテゴリ / ■テンプレート', '') anslist = [re.sub(re.compile('\[.+\]'), '' , s) for s in ans.split('。')] ans = ''.join(['。'.join(anslist[:8]), '。']).replace('。。', '。') return ans except Exception as e: d(e) return ''.join(['\'', word, '\'に一致する語は見つかりませんでした。'])