Example #1
0
    def test_find_pages(self):
        page_1 = WebPage("http://tradein.nissan.co.jp/")
        page_1.title = "自動車の下取りと売却"
        page_1.snippet = "自動車には下取りをする方法がけっこうある。"

        page_2 = WebPage("http://www.link-nexus.com/")
        page_2.title = "自動車の下取りと販売"
        page_2.snippet = "あばばばばば"

        page_3 = WebPage("http://toyota.jp/service/tradein/dc/top")
        page_3.title = "下取り参考価格情報"
        page_3.snippet = "下取りと販売ですよプロデューサーさん"

        search_engine = SearchEngine()
        search_engine.material_pages = [page_1, page_2, page_3]
        search_engine.hint_word = "自動車"
        search_engine.action_word = "下取り"
        search_engine.find_pages_including_related_words()
        self.assertEqual(search_engine.result_pages[0], page_1)
        self.assertEqual(search_engine.result_pages[1], page_2)
        self.assertEqual(search_engine.result_pages[2], page_3)

        search_engine.count_action_words()
        self.assertEqual(search_engine.action_words_count, {"販売": 2, "売却": 1})

        search_engine.sort_action_words_count()
        self.assertEqual(search_engine.sorted_action_words, [{"word": "販売", "count": 2}, {"word": "売却", "count": 1}])
Example #2
0
    def test_find_pages(self):
        page_1 = WebPage('http://tradein.nissan.co.jp/')
        page_1.title = '自動車の下取りと売却'
        page_1.snippet = '自動車には下取りをする方法がけっこうある。'

        page_2 = WebPage('http://www.link-nexus.com/')
        page_2.title = '自動車の下取りと販売'
        page_2.snippet = 'あばばばばば'

        page_3 = WebPage('http://toyota.jp/service/tradein/dc/top')
        page_3.title = '下取り参考価格情報'
        page_3.snippet = '下取りと販売ですよプロデューサーさん'

        search_engine = SearchEngine()
        search_engine.material_pages = [page_1, page_2, page_3]
        search_engine.hint_word = '自動車'
        search_engine.action_word = '下取り'
        search_engine.find_pages_including_related_words()
        self.assertEqual(search_engine.result_pages[0], page_1)
        self.assertEqual(search_engine.result_pages[1], page_2)
        self.assertEqual(search_engine.result_pages[2], page_3)

        search_engine.count_action_words()
        self.assertEqual(search_engine.action_words_count, {'販売': 2, '売却': 1})

        search_engine.sort_action_words_count()
        self.assertEqual(search_engine.sorted_action_words, [{
            'word': '販売',
            'count': 2
        }, {
            'word': '売却',
            'count': 1
        }])
Example #3
0
 def test_find_related_action_words_from_clueweb(self):
     se = SearchEngine()
     se.hint_word = "大学"
     se.action_word = "入学"
     se.set_solr_query()
     se.find_related_action_words_from_clueweb()
     self.assertEqual(len(se.result_pages), 1)
Example #4
0
 def test_find_related_action_words_from_clueweb(self):
     se = SearchEngine()
     se.hint_word = '大学'
     se.action_word = '入学'
     se.set_solr_query()
     se.find_related_action_words_from_clueweb()
     self.assertEqual(len(se.result_pages), 1)
Example #5
0
def search_in_clueweb_with_expanded_query():
    search_engine = SearchEngine()
    search_engine.action_word = request.form['action_word']
    search_engine.hint_word = request.form['hint_word']
    search_engine.find_related_action_words_with_google()
    search_engine.count_action_words()
    search_engine.sort_action_words_count()
    search_engine.pick_sorted_action_words_more_than_1_count()
    results = []
    for elem in search_engine.sorted_action_words_more_than_1_count:
        elem['expanded_query'] = search_engine.action_word + ' ' + search_engine.hint_word + ' ' + elem['word']
        url = 'http://karen.dl.local:8983/solr/ClueWeb09ja/select?q=' + elem['expanded_query'] + '&wt=xml'
        web_page = WebPage(url)
        web_page.fetch_xml()
        web_page.pick_texts_to_result_pages()
        # クエリ1つごとに結果xmlページがある
        # 結果xmlページの内容を1ページずつWebPageオブジェクトにしてresult_pagesとして1クエリに対応する結果ページに持たせる
        for result_page in web_page.result_pages:
            # result_page.text_body
            result_page.set_lines_from_texts()
            result_page.set_line_nums_with_word(search_engine.action_word)
            result_page.set_line_nums_around_action_word()
            result_page.set_line_clusters_around_action_word()
        # web_page.result_pages[0].line_clusters_around_action_word
        results.append({'pages': web_page.result_pages, 'expanded_query': elem['expanded_query']})
    return render_template('search_in_clueweb_with_expanded_query.tmpl',
        results=results)
Example #6
0
 def test_clueweb_search(self):
     se = SearchEngine()
     se.hint_word = "大学"
     se.action_word = "入学"
     se.set_solr_query()
     texts = se.clue_web_search(se.solr_query)
     self.assertEqual(len(texts), 50)
     self.assertEqual("大学" and "入学" in texts[0], True)
Example #7
0
 def test_clueweb_search(self):
     se = SearchEngine()
     se.hint_word = '大学'
     se.action_word = '入学'
     se.set_solr_query()
     texts = se.clue_web_search(se.solr_query)
     self.assertEqual(len(texts), 50)
     self.assertEqual('大学' and '入学' in texts[0], True)
Example #8
0
def find_related_action_words():
    search_engine = SearchEngine()
    search_engine.action_word = request.form['action_word']
    search_engine.hint_word = request.form['hint_word']
    search_engine.find_related_action_words()
    search_engine.count_action_words()
    search_engine.sort_action_words_count()
    for elem in search_engine.sorted_action_words:
        elem['expanded_query'] = search_engine.action_word + ' ' + search_engine.hint_word + ' ' + elem['word']
    return render_template('find_related_action_words.tmpl', items=search_engine.result_pages, sorted_action_words=search_engine.sorted_action_words, found_pages=search_engine.material_pages, query=search_engine.actual_query)