def test_find_pages(self): page_1 = WebPage("http://tradein.nissan.co.jp/") page_1.title = "自動車の下取りと売却" page_1.snippet = "自動車には下取りをする方法がけっこうある。" page_2 = WebPage("http://www.link-nexus.com/") page_2.title = "自動車の下取りと販売" page_2.snippet = "あばばばばば" page_3 = WebPage("http://toyota.jp/service/tradein/dc/top") page_3.title = "下取り参考価格情報" page_3.snippet = "下取りと販売ですよプロデューサーさん" search_engine = SearchEngine() search_engine.material_pages = [page_1, page_2, page_3] search_engine.hint_word = "自動車" search_engine.action_word = "下取り" search_engine.find_pages_including_related_words() self.assertEqual(search_engine.result_pages[0], page_1) self.assertEqual(search_engine.result_pages[1], page_2) self.assertEqual(search_engine.result_pages[2], page_3) search_engine.count_action_words() self.assertEqual(search_engine.action_words_count, {"販売": 2, "売却": 1}) search_engine.sort_action_words_count() self.assertEqual(search_engine.sorted_action_words, [{"word": "販売", "count": 2}, {"word": "売却", "count": 1}])
def test_find_pages(self): page_1 = WebPage('http://tradein.nissan.co.jp/') page_1.title = '自動車の下取りと売却' page_1.snippet = '自動車には下取りをする方法がけっこうある。' page_2 = WebPage('http://www.link-nexus.com/') page_2.title = '自動車の下取りと販売' page_2.snippet = 'あばばばばば' page_3 = WebPage('http://toyota.jp/service/tradein/dc/top') page_3.title = '下取り参考価格情報' page_3.snippet = '下取りと販売ですよプロデューサーさん' search_engine = SearchEngine() search_engine.material_pages = [page_1, page_2, page_3] search_engine.hint_word = '自動車' search_engine.action_word = '下取り' search_engine.find_pages_including_related_words() self.assertEqual(search_engine.result_pages[0], page_1) self.assertEqual(search_engine.result_pages[1], page_2) self.assertEqual(search_engine.result_pages[2], page_3) search_engine.count_action_words() self.assertEqual(search_engine.action_words_count, {'販売': 2, '売却': 1}) search_engine.sort_action_words_count() self.assertEqual(search_engine.sorted_action_words, [{ 'word': '販売', 'count': 2 }, { 'word': '売却', 'count': 1 }])
def test_find_related_action_words_from_clueweb(self): se = SearchEngine() se.hint_word = "大学" se.action_word = "入学" se.set_solr_query() se.find_related_action_words_from_clueweb() self.assertEqual(len(se.result_pages), 1)
def test_find_related_action_words_from_clueweb(self): se = SearchEngine() se.hint_word = '大学' se.action_word = '入学' se.set_solr_query() se.find_related_action_words_from_clueweb() self.assertEqual(len(se.result_pages), 1)
def search_in_clueweb_with_expanded_query(): search_engine = SearchEngine() search_engine.action_word = request.form['action_word'] search_engine.hint_word = request.form['hint_word'] search_engine.find_related_action_words_with_google() search_engine.count_action_words() search_engine.sort_action_words_count() search_engine.pick_sorted_action_words_more_than_1_count() results = [] for elem in search_engine.sorted_action_words_more_than_1_count: elem['expanded_query'] = search_engine.action_word + ' ' + search_engine.hint_word + ' ' + elem['word'] url = 'http://karen.dl.local:8983/solr/ClueWeb09ja/select?q=' + elem['expanded_query'] + '&wt=xml' web_page = WebPage(url) web_page.fetch_xml() web_page.pick_texts_to_result_pages() # クエリ1つごとに結果xmlページがある # 結果xmlページの内容を1ページずつWebPageオブジェクトにしてresult_pagesとして1クエリに対応する結果ページに持たせる for result_page in web_page.result_pages: # result_page.text_body result_page.set_lines_from_texts() result_page.set_line_nums_with_word(search_engine.action_word) result_page.set_line_nums_around_action_word() result_page.set_line_clusters_around_action_word() # web_page.result_pages[0].line_clusters_around_action_word results.append({'pages': web_page.result_pages, 'expanded_query': elem['expanded_query']}) return render_template('search_in_clueweb_with_expanded_query.tmpl', results=results)
def test_clueweb_search(self): se = SearchEngine() se.hint_word = "大学" se.action_word = "入学" se.set_solr_query() texts = se.clue_web_search(se.solr_query) self.assertEqual(len(texts), 50) self.assertEqual("大学" and "入学" in texts[0], True)
def test_clueweb_search(self): se = SearchEngine() se.hint_word = '大学' se.action_word = '入学' se.set_solr_query() texts = se.clue_web_search(se.solr_query) self.assertEqual(len(texts), 50) self.assertEqual('大学' and '入学' in texts[0], True)
def find_related_action_words(): search_engine = SearchEngine() search_engine.action_word = request.form['action_word'] search_engine.hint_word = request.form['hint_word'] search_engine.find_related_action_words() search_engine.count_action_words() search_engine.sort_action_words_count() for elem in search_engine.sorted_action_words: elem['expanded_query'] = search_engine.action_word + ' ' + search_engine.hint_word + ' ' + elem['word'] return render_template('find_related_action_words.tmpl', items=search_engine.result_pages, sorted_action_words=search_engine.sorted_action_words, found_pages=search_engine.material_pages, query=search_engine.actual_query)