Exemple #1
0
def analyse_sentiment_yahoo(word = ''):
	 # リアルタイム検索
	USER_AGENT = {'User-Agent': 'Mozilla/5.0'} #Needed to prevent 403 error
	phantomjs_path = '/usr/local/bin/phantomjs'
	driver = webdriver.PhantomJS(executable_path=phantomjs_path, service_log_path=os.path.devnull, desired_capabilities={'phantomjs.page.settings.userAgent':USER_AGENT})
	driver.get("http://realtime.search.yahoo.co.jp/realtime")
	try:
		elem = driver.find_element_by_name('p')
	except:
		return False
	elem.clear()
	elem.send_keys(word)
	elem.send_keys(Keys.RETURN)
	time.sleep(1)
	html = driver.page_source.encode('utf-8')  # more sophisticated methods may be available
	soup = bs4.BeautifulSoup(html, 'lxml')
	ptext = soup.findAll('script')
	pstr = ''.join([p.get_text() for p in ptext])
	reg = 'YAHOO.JP.srch.rt.sentiment = (?P<json>.+)'
	compiled_reg = re.compile(reg, re.M)
	reg_ls = compiled_reg.search(pstr)
	if reg_ls:
		reg_ls_json = reg_ls.groupdict()
		senti_json = reg_ls_json['json']
		if senti_json:
			sentiment_dic = json.loads(senti_json)
			return sentiment_dic
Exemple #2
0
def search_wiki(word = 'クロマニョン人'):
	ans = ''
	try:
		converted_word = urllib.parse.quote_plus(word, encoding="utf-8")
		wiki_url = ''.join(["https://ja.wikipedia.org/wiki/", converted_word])
		soup = get_bs4soup(wiki_url)
		ptext = soup.findAll("p")
		pstr =  ''.join([p.get_text() for p in ptext])
		ans = re.sub(re.compile('\<.+\>'), '' , pstr)
		ans = ans.replace('この記事には複数の問題があります。改善やノートページでの議論にご協力ください。', '').replace('■カテゴリ / ■テンプレート', '')
		anslist = [re.sub(re.compile('\[.+\]'), '' , s) for s in ans.split('。')]
		ans = ''.join(['。'.join(anslist[:8]), '。']).replace('。。', '。')
		return ans
	except Exception as e:
		d(e)
		return ''.join(['\'', word, '\'に一致する語は見つかりませんでした。'])