コード例 #1
0
    def setUp(self):
        '''
        In this method we create an indexer, create a text file,
        index it, then delete the file and the indexer.
        Then create an object of SearchEngine()
        '''
        indexer = ToIndex('database')
        self.maxDiff = None
        text = open('test_text.txt', 'w')
        text.write('Ах, не говорите мне про Австрию! \
                    Я ничего не понимаю, может быть')
        text.close()
        another_text = open('another_test_text.txt', 'w')
        another_text.write('но Ах Австрия никогда не хотела и не хочет войны.\
                            Она предает нас')
        another_text.close()

        text1 = open('test_text1.txt', 'w')
        text1.write('ooh la la мама мыла раму123  frf34')
        text1.close()
        text2 = open('test_text2.txt', 'w')
        text2.write('мама мыла окно')
        text2.close()
        text3 = open('test_text3.txt', 'w')
        text3.write('мама мыла еще что-нибудь')
        text3.close()
        
        
        indexer.index_by_line('test_text.txt')
        indexer.index_by_line('another_test_text.txt')
        del indexer
        
        self.search_eng = SearchEngine('database')
コード例 #2
0
 def print_result(self):
     self.query = self.entry.get()
     My_search_engine = SearchEngine(self.db)
     self.top_level = Toplevel(self.tk)
     self.top_level.geometry("500x500")
     self.top_level.title("The Result")
     result = My_search_engine.print_results(self.query)
     return_doc = My_search_engine.get_return_doc()
     if len(self.query) == 0:
         label = Label(self.top_level, text="Please enter your text again")
     elif len(result) == 0:
         label = Label(self.top_level, text="No result found")
     else:
         label = Label(self.top_level,
                       text="These are the top 20 results for query " +
                       self.query + ": ")
     label.pack()
     i = 0
     for single_result in result:
         single_link_and_snippet = Label(
             self.top_level,
             text=single_result + "  Its snippet: " +
             self.snippet_dict[return_doc[i]] + "...",
             fg="blue",
             cursor="hand2")
         single_link_and_snippet.pack()
         #single_snippet = Label(self.top_level, text="its snippet: "+ self.snippet_dict[return_doc[i]])
         i += 1
         single_link_and_snippet.bind("<Button-1>", self.show_content)
コード例 #3
0
ファイル: __main__.py プロジェクト: ygorcanalli/bri-2015-01
def __main__(argv):
    #%%
    logger = logging.getLogger(__name__)
    logger.info("VECTOR MODEL INFORMATION RETRIEVAL SYSTEM START")    
    
    gli = InvertedIndexGenerator(GLI_CONFIG_FILE)
    gli.run()
    gli.write_output()
    
    index = Indexer(INDEX_CONFIG_FILE, TfidfVectorizer)
    index.run()
    index.write_output()
    
    pc = QueryProcessor(PC_CONFIG_FILE)
    pc.run()
    pc.write_output()
    
    buscador = SearchEngine(BUSCA_CONFIG_FILE, TfidfVectorizer)
    buscador.run()
    buscador.write_output()
    #%%
    avaliador = Evaluator(AVAL_CONFIG_FILE)
    avaliador.run()
    avaliador.write_output()
    
    logger.info("VECTOR MODEL INFORMATION RETRIEVAL SYSTEM DONE")     
コード例 #4
0
ファイル: hw4_test.py プロジェクト: danielqiang/cse163
def test_search_engine():
    """Tests the SearchEngine class"""
    engine = SearchEngine('test_search')
    assert_equals(None, engine.search('asdf'))
    assert_equals(['test_search/file.txt'], engine.search('dog'))
    assert_equals(['test_search/file.txt', 'test_search/file1.txt'],
                  engine.search('test'))
コード例 #5
0
    def test_find_pages(self):
        page_1 = WebPage("http://tradein.nissan.co.jp/")
        page_1.title = "自動車の下取りと売却"
        page_1.snippet = "自動車には下取りをする方法がけっこうある。"

        page_2 = WebPage("http://www.link-nexus.com/")
        page_2.title = "自動車の下取りと販売"
        page_2.snippet = "あばばばばば"

        page_3 = WebPage("http://toyota.jp/service/tradein/dc/top")
        page_3.title = "下取り参考価格情報"
        page_3.snippet = "下取りと販売ですよプロデューサーさん"

        search_engine = SearchEngine()
        search_engine.material_pages = [page_1, page_2, page_3]
        search_engine.hint_word = "自動車"
        search_engine.action_word = "下取り"
        search_engine.find_pages_including_related_words()
        self.assertEqual(search_engine.result_pages[0], page_1)
        self.assertEqual(search_engine.result_pages[1], page_2)
        self.assertEqual(search_engine.result_pages[2], page_3)

        search_engine.count_action_words()
        self.assertEqual(search_engine.action_words_count, {"販売": 2, "売却": 1})

        search_engine.sort_action_words_count()
        self.assertEqual(search_engine.sorted_action_words, [{"word": "販売", "count": 2}, {"word": "売却", "count": 1}])
コード例 #6
0
ファイル: main.py プロジェクト: 1160300901/search-engine
def searchidlist(key, selected=0):
    global page
    global doc_id
    se = SearchEngine('../config.ini', 'utf-8')
    flag, id_scores = se.search(key, selected)
    # 返回docid列表
    doc_id = [i for i, s in id_scores]

    # TODO 根据用户等级过滤掉部分数据
    global dir_path, db_path, rank
    conn = sqlite3.connect(db_path)
    c = conn.cursor()
    doc_id_rank = []
    for id in doc_id:
        c.execute("SELECT * FROM news WHERE id=?", (id, ))
        fetch = c.fetchone()
        if fetch[5] < rank:
            doc_id_rank.append(id)
    doc_id.clear()
    doc_id = doc_id_rank.copy()
    print("**rank**: ", rank)
    print("**doc_id**: ", doc_id)
    page = []
    for i in range(1, (len(doc_id) // 10 + 2)):
        page.append(i)
    return flag, page
コード例 #7
0
def main():
    # Instance
    generator = SearchEngineGenerator()
    search = SearchEngine()

    # Generate invert index
    ans = input('Do you want to build scores?[type \'y\' to build or pass]: ')
    if ans == 'y':
        generator()
    # generator()

    while True:
        query = input('2018-26161> ')
        if query in '/quit':
            print("Okay bye!")
            break
        elif query.startswith('-run'):
            path = query.split()[1].strip()
            if not os.path.exists(path):
                print("경로가 올바르지 않습니다.")
                continue
            # path = 'data/prj2.sched'
            simulator = ScheduleSimulator(path)
            simulator()

        results = search(query)

        for doc in results:
            print(search.result_formatting(doc))
コード例 #8
0
ファイル: data_enhancer.py プロジェクト: xkuang/Digestant
    def enhance(self, num=10):
        from search_engine import SearchEngine
        se = SearchEngine()

        print("*[Data Organizer] Downloading Google Search Results")
        results = se.get_data(queries=self.df['cleaned_text'])

        results_t = []
        d = {}
        for r in results.items():
            tu = []
            for url, text in zip(r[1]['url'], r[1]['text']):
                tu.append((url, text))
            d = {'source_text': r[0], 'result': tu}
            results_t.append(d)

        self.df['google-search'] = [i for i in range(len(self.df))]
        for d, i in zip(self.df['cleaned_text'], range(len(self.df))):
            for r in results_t:
                if str(r['source_text']) == str(d):
                    self.df['google-search'][i] = r['result']

        q = []
        for gs in self.df['google-search']:
            types = dict((el, []) for el in list(self.domains))
            for result in gs:
                _type = self.in_domain(result[0])
                if _type != "": types[_type].append(result)
            q.append(types)

        self.df['types'] = q
        return self.df
コード例 #9
0
def search(query, count):
    search_engine = request.forms.get('search_engine')
    engine = SearchEngine()
    if search_engine == 'google':
        pages = engine.google_search(query, count)
    elif search_engine == 'bing':
        pages = engine.bing_search(query, count)
    return pages
コード例 #10
0
def test_search_engine_3():
    test = SearchEngine('test_dir3')

    # Tests Empty Directory
    assert_equals(1, test._num_docs)
    assert_equals({}, test._docs)
    assert_equals(0, test._calculate_idf('Samsung'))
    assert_equals(None, test.search('Samsung'))
コード例 #11
0
ファイル: app.py プロジェクト: katryo/subtask_search
def search(query, count):
    search_engine = request.forms.get('search_engine')
    engine = SearchEngine()
    if search_engine == 'google':
        pages = engine.google_search(query, count)
    elif search_engine == 'bing':
        pages = engine.bing_search(query, count)
    return pages
コード例 #12
0
def query_db(query, tables, Descriptions):
    files = get_codes(tables)
    desc_files = get_descriptions(Descriptions)
    docs = generate_documents(files, desc_files)

    engine = SearchEngine(docs)
    search_results = engine.search(query)

    return search_results
コード例 #13
0
def start():
    searchEngine = SearchEngine("data/")

    print("For finish write 'exit'")
    request = input("User: "******"exit"):
        answer = searchEngine.search(request)
        print_result(answer)
        request = input("User: ")
コード例 #14
0
def search_by_google_or_bing(request):
    query = request.form["query"]
    search_engine_name = request.form['search_engine']
    search_engine = SearchEngine()
    if search_engine_name == 'google':
        pages = search_engine.google_search(query, 1)
    else:
        pages = search_engine.bing_search(query, 1)
    return pages
コード例 #15
0
ファイル: search_tests.py プロジェクト: anakru07/proga
 def setUp(self):
     self.engine = SearchEngine('database')
     self.engine.database.update(database)
     test = open("test1.txt", 'w')
     test.write(test1)
     test.close()
     test = open("test2.txt", 'w')
     test.write(test2)
     test.close()
コード例 #16
0
class TestSearchEngine(unittest.TestCase):

    def setUp(self):
        '''
        Create an indexer, create a text file,
        index it, then delete the file and the indexer.
        Then create an object of SearchEngine()
        '''
        indexer = ToIndex('database')
        self.maxDiff = None
        text = open('test_text.txt', 'w')
        text.write('mama мыла ramu')
        text.close()
        indexer.index_by_line('test_text.txt')
        del indexer
        self.search_eng = SearchEngine('database')

    def tearDown(self):
        '''
        In this method we destroy an object of SearchEngine()
        and delete 'database'.
        '''
        del self.search_eng
        files = os.listdir()
        for single_file in files:
            if single_file == "database": 
                os.remove(single_file)
            else:        

                if single_file.startswith('database.'):                                                            
                    os.remove(single_file)
        os.remove('test_text.txt')
        
    def test_empty_query(self):
        '''
        Test that ValueError is raised if the query is an empty string.
        '''
        
        with self.assertRaises(ValueError):
            self.search_eng.search("")

    def test_query_is_a_number(self):
        """
        If the query is a number raise TypeError.
        """        
        with self.assertRaises(TypeError):
            self.search_eng.search(42)

    def test_program_runs_okay(self):
        '''
        Test that program runs as expected given there is one word in the query
        and one file in the database.
        '''
        search_res = self.search_eng.search('мыла')
        ref_dict = {'test_text.txt': [PositionByLine(5, 9, 0)]}
        self.assertEqual(ref_dict, search_res)
コード例 #17
0
def find_related_action_words():
    search_engine = SearchEngine()
    search_engine.action_word = request.form['action_word']
    search_engine.hint_word = request.form['hint_word']
    search_engine.find_related_action_words()
    search_engine.count_action_words()
    search_engine.sort_action_words_count()
    for elem in search_engine.sorted_action_words:
        elem['expanded_query'] = search_engine.action_word + ' ' + search_engine.hint_word + ' ' + elem['word']
    return render_template('find_related_action_words.tmpl', items=search_engine.result_pages, sorted_action_words=search_engine.sorted_action_words, found_pages=search_engine.material_pages, query=search_engine.actual_query)
コード例 #18
0
class QASystem(object):
    def __init__(self):

        if exists('database.dat'):
            # deserialize database is much faster.
            print('deserialize the QA database...')
            self.search_engine = SearchEngine('cc/cppjieba/dict',
                                              'database.dat')
        else:
            # load database from txt is slower.
            print('load from QA database from txt format...')
            self.search_engine = SearchEngine('cc/cppjieba/dict')
            self.search_engine.loadFromTxt('question_answer.txt')
            self.search_engine.save('database.dat')
        self.predictor = Predictor()

    def query(self, question, count=3):

        answer_scores = self.search_engine.query(question, count)
        answer_totalscores = dict()
        for answer, match in answer_scores.items():
            _, relevance = self.predictor.predict(question, answer)
            answer_totalscores[answer] = (
                log(max(match[0], sys.float_info.min)) * relevance,
                match[1],
            )
        return answer_totalscores

    def updateDB(self, file):

        assert type(file) is str
        self.search_engine.loadFromTxt(file)
        self.search_engine.save('database.dat')
コード例 #19
0
class QASystem(object):
    def __init__(self):

        if exists('database.dat'):
            # deserialize database is much faster.
            print('deserialize the QA database...')
            self.search_engine = SearchEngine('cppjieba/dict', 'database.dat')
        else:
            # load database from txt is slower.
            print('load from QA database from txt format...')
            self.search_engine = SearchEngine('cppjieba/dict')
            self.search_engine.loadFromTxt('question_answer.txt')
            self.search_engine.save('database.dat')
        self.predictor = Predictor()

    def query(self, question, count=3):

        answer_scores = self.search_engine.query(question, count)
        answer_totalscores = dict()
        for answer, match in answer_scores.items():
            _, relevance = self.predictor.predict(question, answer)
            answer_totalscores[answer] = exp(match) + exp(relevance)
        # sort in descend order of total score
        sorted(answer_totalscores, key=operator.itemgetter(1), reverse=True)
        return answer_totalscores

    def updateDB(self, file):

        assert type(file) is str
        self.search_engine.loadFromTxt(file)
        self.search_engine.save('database.dat')
コード例 #20
0
ファイル: main.py プロジェクト: 01joy/news_search_engine
def searchidlist(key, selected=0):
    global page
    global doc_id
    se = SearchEngine('../config.ini', 'utf-8')
    flag, id_scores = se.search(key, selected)
    # 返回docid列表
    doc_id = [i for i, s in id_scores]
    page = []
    for i in range(1, (len(doc_id) // 10 + 2)):
        page.append(i)
    return flag,page
コード例 #21
0
ファイル: hw4_test.py プロジェクト: AdamK42/cse163-homework
def test_searchengine_search():
    '''
    Tests the search function.
    '''
    test = SearchEngine(DIRECTORY)

    expected = [FILE1]
    assert_equals(expected, test.search('super'))
    assert_equals(None, test.search('croissant'))
    expected = [FILE1, FILE2]
    assert_equals(expected, test.search('Apple pie'))
コード例 #22
0
def searchidlist(key, selected=0):
    global page
    global doc_id
    se = SearchEngine('../config.ini', 'utf-8')
    flag, id_scores = se.search(key, selected)
    # 返回docid列表
    doc_id = [i for i, s in id_scores]
    page = []
    for i in range(1, (len(doc_id) // 10 + 2)):
        page.append(i)
    return flag, page
コード例 #23
0
 def setUp(self):
     index = indexer.Indexer('dbase')        
     f = open('test.txt', 'w')
     f.write('this is\ntest')
     f.close()
     t = open('tst.txt', 'w')
     t.write('test')
     t.close()        
     index.indexing_with_lines('test.txt')
     index.indexing_with_lines('tst.txt')
     del index
     self.s = SearchEngine('dbase')
コード例 #24
0
 def setUp(self):
     index = indexer.Indexer('dbase')        
     f = open('test.txt', 'w')
     f.write('this is a test required for helping students create a test\n')
     f.write(' professor required to write a test first')
     f.close()
     t = open('tst.txt', 'w')
     t.write('test is required. On the other hand...')
     t.close()        
     index.indexing_with_lines('test.txt')
     index.indexing_with_lines('tst.txt')
     del index
     self.s = SearchEngine('dbase')
コード例 #25
0
    def __init__(self):

        if exists('database.dat'):
            # deserialize database is much faster.
            print('deserialize the QA database...')
            self.search_engine = SearchEngine('cppjieba/dict', 'database.dat')
        else:
            # load database from txt is slower.
            print('load from QA database from txt format...')
            self.search_engine = SearchEngine('cppjieba/dict')
            self.search_engine.loadFromTxt('question_answer.txt')
            self.search_engine.save('database.dat')
        self.predictor = Predictor()
コード例 #26
0
 def test_find_related_action_words_from_clueweb(self):
     se = SearchEngine()
     se.hint_word = '大学'
     se.action_word = '入学'
     se.set_solr_query()
     se.find_related_action_words_from_clueweb()
     self.assertEqual(len(se.result_pages), 1)
コード例 #27
0
 def setUp(self):
     '''
     Create an indexer, create a text file,
     index it, then delete the file and the indexer.
     Then create an object of SearchEngine()
     '''
     indexer = ToIndex('database')
     self.maxDiff = None
     text = open('test_text.txt', 'w')
     text.write('mama мыла ramu')
     text.close()
     indexer.index_by_line('test_text.txt')
     del indexer
     self.search_eng = SearchEngine('database')
コード例 #28
0
ファイル: hw4_test.py プロジェクト: AdamK42/cse163-homework
def test_searchengine_fields():
    '''
    Tests the fields of a search engine after construction.
    '''
    test = SearchEngine(DIRECTORY)
    doc1 = Document(FILE1)
    doc2 = Document(FILE2)
    doc3 = Document(FILE3)

    assert_equals(3, test._total_documents)

    expected = {
        'i': [doc1, doc2, doc3],
        'like': [doc1, doc2],
        'apple': [doc1],
        'pie': [doc1, doc2],
        'is': [doc1, doc3],
        'super': [doc1],
        'duper': [doc1],
        'cool': [doc1, doc3],
        'also': [doc2],
        'chocolate': [doc2, doc3],
        'cake': [doc3],
        'guess': [doc3]
    }

    assert_equals(expected, test._all_terms)
コード例 #29
0
 def test_find_related_action_words_from_clueweb(self):
     se = SearchEngine()
     se.hint_word = "大学"
     se.action_word = "入学"
     se.set_solr_query()
     se.find_related_action_words_from_clueweb()
     self.assertEqual(len(se.result_pages), 1)
コード例 #30
0
ファイル: search_tests.py プロジェクト: anakru07/proga
class TestSearchEngine(unittest.TestCase):
    def setUp(self):
        self.engine = SearchEngine('database')
        self.engine.database.update(database)
        test = open("test1.txt", 'w')
        test.write(test1)
        test.close()
        test = open("test2.txt", 'w')
        test.write(test2)
        test.close()

    def test_empty(self):
        result = self.engine.single_token_search('')
        self.assertEqual(result, {})

    def test_search_one(self):
        result = self.engine.single_token_search('for')
        self.assertEqual(result, {'test2.txt': [(15, 18)]})

    def test_search_many_one(self):
        result = self.engine.multiple_tokens_search('testing')
        self.assertEqual(
            result, {
                'test1.txt': [Position_with_lines(11, 18, 0)],
                'test2.txt': [Position_with_lines(0, 7, 0)]
            })

    def test_search_many_two(self):
        result = self.engine.multiple_tokens_search('testing ground')
        self.assertEqual(
            result, {
                'test1.txt': [
                    Position_with_lines(11, 18, 0),
                    Position_with_lines(19, 24, 0)
                ],
                'test2.txt':
                [Position_with_lines(0, 7, 0),
                 Position_with_lines(8, 14, 0)]
            })

    def tearDown(self):
        if 'test1.txt' in os.listdir(os.getcwd()):
            os.remove('test1.txt')
        if 'test2.txt' in os.listdir(os.getcwd()):
            os.remove('test2.txt')
コード例 #31
0
ファイル: database.py プロジェクト: pvt2345/KMS_IDRec
    def search(self, key_search):
        data = self.select('content')
        searcher = SearchEngine(key_search)
        result = []

        for d in data:
            content = d.get('content', ' ')
            if len(content.strip()) < 4:
                continue
            if len(
                    re.findall('|'.join(key_search.lower().split()),
                               content.lower())) == 0:
                continue

            titles = d.get('tieu_de', ' ').split('|')
            score = 0
            try:
                ok = True
                for i, title in enumerate(titles):
                    score_tieu_de, _ = searcher.LCS4Sentence(u'' + title)
                    score += (i + 1) / len(titles) * score_tieu_de * 2

                sentences = content.split('.')
                score_content = 0
                n_content = 0
                for sentence in sentences:
                    s_content, index = searcher.LCS4Sentence(u'' + sentence)
                    # score_content += s_content
                    # if s_content > 0:
                    #     n_content += 1
                    if s_content > score_content:
                        score_content = s_content
                score_content = 2 * score_content / (n_content + 1)
                _, index = searcher.LCS4Sentence(u'' + content)
                score += score_content
                if score < 0.3:
                    continue
                index = ';'.join(['{0}-{1}'.format(s, e) for s, e in index])
                reference = d.get('reference')  # d['stt']

                result.append({
                    'reference': reference,
                    'title': title.split('|')[-1],
                    'content': content,
                    'score': score,
                    'index': index
                })
            except Exception as e:
                print(e)
                ok = False
                # from text_mining.search_engine.search_engine import SearchEngine
                searcher = SearchEngine(key_search)

            if not ok:
                print('error')
                # print(searcher.LCS4Sentence('haha'))
        result = [r for r in result if r['score'] > 0.05]
        result = sorted(result, key=lambda r: r['score'], reverse=True)
        result = result[:20]
        return result
コード例 #32
0
 def test_clueweb_search(self):
     se = SearchEngine()
     se.hint_word = '大学'
     se.action_word = '入学'
     se.set_solr_query()
     texts = se.clue_web_search(se.solr_query)
     self.assertEqual(len(texts), 50)
     self.assertEqual('大学' and '入学' in texts[0], True)
コード例 #33
0
ファイル: queue_service.py プロジェクト: alfinoc/queue
   def get_all_search_results(self, request, **values):
      if not request.args.has_key('q'):
         raise BadRequest('please provide a search query \'q\'')
      engine = SearchEngine()
      try:
         query = request.args['q']
         limit = int(request.args['limit']) if request.args.has_key('limit') else 10
         service = request.args['service'] if request.args.has_key('service') else 'all'
      except:
         return BadRequest('error parsing request. make sure limit is an integer')

      sc_res = []
      yt_res = []
      # query each of the services provided by the engine
      if service == 'all' or service == 'soundcloud':
         sc_res = engine.soundcloud_query(query, limit)
      if service == 'all' or service == 'youtube':
         yt_res = engine.youtube_query(query, limit)
      return self.render_template('results.txt', results=sc_res + yt_res)
コード例 #34
0
ファイル: main.py プロジェクト: harjotd/DocFinder
def main():
    print("Building SearchEngine")
    engine = SearchEngine()

    answer = 'y'
    while answer == 'y':
        term = input('Enter Search Term:')
        ranking = engine.search(term)
        print("Displaying results for " + "'" + term + "':")
        if ranking is None:
            print("No results")
        rank = 1
        for doc in ranking:
            print('    ' + str(rank) + '. ' + doc)
            rank += 1
        print()
        answer = ''
        while not (answer == 'y' or answer == 'n'):
            answer = input('Would you like to search another term (y/n) ')
コード例 #35
0
def main():
    test1 = Document('test_docs/test1.txt')
    test2 = Document('test_docs/test2.txt')
    test3 = Document('test_docs/test3.txt')
    test4 = Document('test_docs/test4.txt')
    test_search1 = SearchEngine('test_docs')

    test_document(test1, test2, test3, test4)
    test_single(test_search1)
    test_mulit(test_search1)
コード例 #36
0
def search_service(request):
    """
    Parses http json request and returns list of articles and their
    scores depending on the input query
    Args:
        request: http POST body request as json
    Returns:
        list of articles with their respective scores
    """
    json = request.get_json()
    logging.info(json)

    if "query" not in json:
        message = "ValueError: Expected 'query' field in json body missing"
        error = {"error": {"message": message}}
        logging.error(message)
        return error

    query = json["query"]
    search = SearchEngine(keywords_weight=constants.KEYWORDS_WEIGHT)
    try:
        score_per_article = search.query(query)
    except Exception as e:
        error = {
            "error": {
                "message": getattr(e, 'message', str(e)),
                "trace": traceback.format_exc()
            }
        }
        logging.error(error['error'])
        return error

    # sorts dictionary by value in DESC order
    articles_sorted = [
        k for k, v in sorted(
            score_per_article.items(), key=lambda item: item[1], reverse=True)
    ]

    response = {"articles": articles_sorted}

    logging.info(response)
    return response
コード例 #37
0
 def test_clueweb_search(self):
     se = SearchEngine()
     se.hint_word = "大学"
     se.action_word = "入学"
     se.set_solr_query()
     texts = se.clue_web_search(se.solr_query)
     self.assertEqual(len(texts), 50)
     self.assertEqual("大学" and "入学" in texts[0], True)
コード例 #38
0
ファイル: fetch_html.py プロジェクト: katryo/task_search
def search_and_fetch_30_pages():
    search_engine = SearchEngine()
    pages = search_engine.google_search(QUERY, 3)
    [page.fetch_html() for page in pages]
    return pages
コード例 #39
0
from search_engine import SearchEngine

if __name__ == '__main__':
    query = '花粉症対策'
    engine = SearchEngine()
    pages = engine.google_search('"' + 'で' + query + '"', 3)
    for page in pages:
        try:
            i = page.title.index('で' + query)
            print(page.title[(i - 10):])
        except ValueError:
            try:
                i = page.snippet.index('で' + query)
                print(page.snippet[(i - 10):])
            except:
                pass
コード例 #40
0
from search_engine import SearchEngine

search = SearchEngine()
welcome = "Seach by \"Title\", \"Call Number\", \"Subjects\",  \"Other\" or \"Quit\": "

while True:
    input = raw_input(welcome)
    if input.upper().strip() == "QUIT":
        break

    search_string = raw_input("Please enter a search string: ")

    if input.upper().strip() == "TITLE":
        search.search_by_title(search_string)
    elif (input.upper().strip() == "CALL NUMBER"
            or input.upper().strip() == "CALLNUMBER"):
        search.search_by_call_number(search_string)
    elif (input.upper().strip() == "SUBJECTS"
            or input.upper().strip() == "SUBJECT"):
        search.search_by_subjects(search_string)
    elif input.upper().strip() == "OTHER":
        search.search_by_other(search_string)
コード例 #41
0
ファイル: app.py プロジェクト: katryo/task_search
def search_in_clueweb_with_expanded_query():
    search_engine = SearchEngine()
    search_engine.action_word = request.form['action_word']
    search_engine.hint_word = request.form['hint_word']
    search_engine.find_related_action_words_with_google()
    search_engine.count_action_words()
    search_engine.sort_action_words_count()
    search_engine.pick_sorted_action_words_more_than_1_count()
    results = []
    for elem in search_engine.sorted_action_words_more_than_1_count:
        elem['expanded_query'] = search_engine.action_word + ' ' + search_engine.hint_word + ' ' + elem['word']
        url = 'http://karen.dl.local:8983/solr/ClueWeb09ja/select?q=' + elem['expanded_query'] + '&wt=xml'
        web_page = WebPage(url)
        web_page.fetch_xml()
        web_page.pick_texts_to_result_pages()
        # クエリ1つごとに結果xmlページがある
        # 結果xmlページの内容を1ページずつWebPageオブジェクトにしてresult_pagesとして1クエリに対応する結果ページに持たせる
        for result_page in web_page.result_pages:
            # result_page.text_body
            result_page.set_lines_from_texts()
            result_page.set_line_nums_with_word(search_engine.action_word)
            result_page.set_line_nums_around_action_word()
            result_page.set_line_clusters_around_action_word()
        # web_page.result_pages[0].line_clusters_around_action_word
        results.append({'pages': web_page.result_pages, 'expanded_query': elem['expanded_query']})
    return render_template('search_in_clueweb_with_expanded_query.tmpl',
        results=results)
コード例 #42
0
ファイル: server.py プロジェクト: stoyaneft/SearchEngine
def search():
    keyword = request.args.get('keyword', '')
    engine = SearchEngine()
    pages_searched = engine.search(keyword)
    return render_template(
        'results.html', keyword=keyword, pages_searched=pages_searched)
コード例 #43
0
ファイル: pattern_matcher.py プロジェクト: katryo/task_search
 def google_search(self):
     engine = SearchEngine()
     pages = engine.google_search(self.query, self.search_num)
     return pages
コード例 #44
0
"""

fetchnetapps = fetchNetApps(applications)
fetchnetapps.download_apps()
fetchnetapps.decompress_apps()



dbhandle = dbHandle(constants, functions, structures, applications)

apps_in_analysis_db = dbhandle.apps_analysis_is_done()




search_engine = SearchEngine(all_socket_api)

#count all socket APIs under applications directory
apps_dir = os.path.join(os.environ['PWD'],'applications')


#buggy Here
#for name in os.listdir(apps_dir):
for conf_name in applications:	
	#check whether it is the right application download based on configuration file
	# FIX ME:  not try to get 
	#for conf_name in applications:
	for name in os.listdir(apps_dir):
		path = os.path.join(apps_dir, name)
		#print name
		#print conf_name
コード例 #45
0
ファイル: server.py プロジェクト: antonpetkoff/SearchEngine
def do_something():
    query = request.args.get('query', '')
    se = SearchEngine()
    results = se.make_query(query)

    return render_template('result.html', data=results)