class QuerySystem: search_engine = None def __init__(self): self.search_engine = SearchEngine() def get_queries_from_file(self, filename='query.txt'): with open('query.txt', 'r') as query_file: return query_file.readlines() return None def search_query(self, query): results = [] for (document, score) in self.search_engine.search(query.lower()): results.append((document, str(score))) return results def store_result(self, query, result): with open('output/' + query + '.txt', 'w') as output_file: for (doc, score) in result: output_file.write(doc+'\n') def print_result(self, result): for (document, score) in result: print(document + " : " + str(score))
def test_Search_3(self): s = SearchEngine("docs", import_stopwords("stop_words.txt", HashTableSepchain())) query1 = s.parse_words(["ADT"]) query2 = s.parse_words(["Computer Science"]) self.assertEqual(s.search(query1), [('docs/data_structure.txt', 0.017277012046530055)]) self.assertEqual(s.search(query2), [('docs/test.txt', 1.0), ('docs/information_retrieval.txt', 0.017241379310344827), ('docs/hash_table.txt', 0.009523809523809525)])
class SearchEngineCLI(Cmd): prompt = 'DATA1050> ' intro = """Welcome to your search engine! Type help for more information. load FILE_PATH switch TF-IDF | PageRank | smart search keywords... exit """ def __init__(self): """ Initialize Search Engine CLI """ super().__init__() self.engine = None self.mode = 'TF-IDF' def do_load(self, inp): """ load {FILE_PATH} """ try: print(f"loading {inp}...") self.engine = SearchEngine(inp) print(f"loading succeed\n") except Exception as e: print("Loading failed with ", e) def do_switch(self, inp): """ switch {TF-IDF | PageRank | smart} """ if inp not in {'TF-IDF', 'PageRank', 'smart'}: print('mode must be one of {TF-IDF, PageRank, smart}\n') else: self.mode = inp print(f'change mode to {inp}\n') def do_search(self, inp): """ search {keywords...} """ if self.engine is None: print("load a data file first\n") else: results = self.engine.search(inp, self.mode) if len(results) == 0: print("No results returned\n") else: response = pds.DataFrame(columns=['documents', 'score'], data=results).to_string() print(response) print("") def do_exit(self, inp): """ exit the program """ print("Bye!\n") return True
def ft_search(collection, query): se = SearchEngine(index_path=index_path, collection_id=collection.id) search_results = se.search(query, limit=100) res = search_results["result"] searcher = search_results["searcher"] results = [] for hit in res[0:res.scored_length()]: # try: d = Document.objects.get(path=hit["path"]) except Document.MultipleObjectsReturned: d = Document.objects.filter( path=hit["path"]).order_by('id').first() preview = hit.highlights("content") results.append((d, preview)) searcher.close() return results
class SearchEngineGUI(QtGui.QDialog, Ui_SearchEngineMainWindow): def __init__(self, parent=None): QtGui.QDialog.__init__(self, parent) self.setupUi(self) # Connect buttons self.pushButtonLoad.clicked.connect(self.slotLoadIndex) self.pushButtonGenerate.clicked.connect(self.slotGenerateIndex) self.lineEditQuery.textChanged.connect(self.slotCheckQueryText) self.pushButtonSearch.clicked.connect(self.slotProcessQuery) self.treeWidgetResults.itemDoubleClicked.connect(self.slotShowDocument) # Create search engine instance self.engine = SearchEngine(client=True) def slotShowDocument(self, item, column): documentWindow = DocumentWindow(self, item.text(0), item.data(1, QtCore.Qt.UserRole+1).toString(), unicode(item.data(1, QtCore.Qt.UserRole+2).toString())) documentWindow.show() def slotProcessQuery(self): query = self.lineEditQuery.text() self.treeWidgetResults.clear() result_dict, terms = self.engine.search(unicode(query)) for docno, docs in result_dict.items(): record = QtGui.QTreeWidgetItem(self.treeWidgetResults) record.setText(0, docno) record.setText(1, "%s..." % docs[:250]) record.setData(1, QtCore.Qt.UserRole+1, docs) record.setData(1, QtCore.Qt.UserRole+2, ",".join(terms)) def slotCheckQueryText(self, text): self.pushButtonSearch.setEnabled(self.engine.is_loaded and bool(text)) def slotGenerateIndex(self): self.groupBox.setEnabled(False) QtGui.qApp.processEvents() self.setWindowTitle("Generating indexes...") QtGui.qApp.processEvents() QtGui.QApplication.setOverrideCursor(QtGui.QCursor(QtCore.Qt.WaitCursor)) self.engine.create_index() # Restore cursor QtGui.QApplication.restoreOverrideCursor() QtGui.QApplication.restoreOverrideCursor() self.setWindowTitle("Search Engine") self.groupBox.setEnabled(True) def slotLoadIndex(self): self.groupBox.setEnabled(False) QtGui.qApp.processEvents() self.setWindowTitle("Loading document cache and inverted index...") QtGui.qApp.processEvents() QtGui.QApplication.setOverrideCursor(QtGui.QCursor(QtCore.Qt.WaitCursor)) # Load engine self.engine.load() # Fill UI with some info self.labelIndexInfo.setText("Loaded %d documents with %d terms." % (len(self.engine.documents), len(self.engine.index))) # Restore cursor QtGui.QApplication.restoreOverrideCursor() QtGui.QApplication.restoreOverrideCursor() self.setWindowTitle("Search Engine") self.groupBox.setEnabled(True)
from Query import Query import pickle # ai_index_documents_length = eval(open('../indices/AIindex/documents_length.txt').read()) # AIindex = SearchEngine([open('../indices/AIindex/BLOCK262'), open('../indices/AIindex/BLOCK263'), open('../indices/AIindex/BLOCK264'), open('../indices/AIindex/BLOCK265')], ai_index_documents_length) # document_frequencies = AIindex.document_frequencies() # with open('../indices/AIindex/document_frequencies.txt', 'w') as file: # file.write(str(document_frequencies)) document_frequencies = eval(open('../indices/AIindex/document_frequencies.txt').read()) concordia_ai_documents_length = eval(open('../indices/ConcordiaAI/documents_length.txt').read()) ConcordiaAI = SearchEngine([open('../indices/ConcordiaAI/BLOCK119'), open('../indices/ConcordiaAI/BLOCK120'), open('../indices/ConcordiaAI/BLOCK121'), open('../indices/ConcordiaAI/BLOCK122'), open('../indices/ConcordiaAI/BLOCK123')], concordia_ai_documents_length) while True: try: k = int(input('Enter number of returns:')) query = Query(input('Enter query:')) except Exception as ex: print(ex.args[0]) continue results = ConcordiaAI.search(query, document_frequencies, k) print('-----BM25 Ranking-----\n') print(results[0]) print('-----BM25 Ranking with AITopics Document Frequencies-----\n') print(results[2]) print('\n-----Tf-idf Ranking-----\n') print(results[1]) print('\n-----Tf-idf Ranking with AITopics Document Frequencies-----\n') print(results[3])
class SearchEngineGUI(QtGui.QDialog, Ui_SearchEngineMainWindow): def __init__(self, parent=None): QtGui.QDialog.__init__(self, parent) self.setupUi(self) # Connect buttons self.pushButtonLoad.clicked.connect(self.slotLoadIndex) self.pushButtonGenerate.clicked.connect(self.slotGenerateIndex) self.lineEditQuery.textChanged.connect(self.slotCheckQueryText) self.pushButtonSearch.clicked.connect(self.slotProcessQuery) self.treeWidgetResults.itemDoubleClicked.connect(self.slotShowDocument) # Create search engine instance self.engine = SearchEngine(client=True) def slotShowDocument(self, item, column): documentWindow = DocumentWindow( self, item.text(0), item.data(1, QtCore.Qt.UserRole + 1).toString(), unicode(item.data(1, QtCore.Qt.UserRole + 2).toString())) documentWindow.show() def slotProcessQuery(self): query = self.lineEditQuery.text() self.treeWidgetResults.clear() result_dict, terms = self.engine.search(unicode(query)) for docno, docs in result_dict.items(): record = QtGui.QTreeWidgetItem(self.treeWidgetResults) record.setText(0, docno) record.setText(1, "%s..." % docs[:250]) record.setData(1, QtCore.Qt.UserRole + 1, docs) record.setData(1, QtCore.Qt.UserRole + 2, ",".join(terms)) def slotCheckQueryText(self, text): self.pushButtonSearch.setEnabled(self.engine.is_loaded and bool(text)) def slotGenerateIndex(self): self.groupBox.setEnabled(False) QtGui.qApp.processEvents() self.setWindowTitle("Generating indexes...") QtGui.qApp.processEvents() QtGui.QApplication.setOverrideCursor( QtGui.QCursor(QtCore.Qt.WaitCursor)) self.engine.create_index() # Restore cursor QtGui.QApplication.restoreOverrideCursor() QtGui.QApplication.restoreOverrideCursor() self.setWindowTitle("Search Engine") self.groupBox.setEnabled(True) def slotLoadIndex(self): self.groupBox.setEnabled(False) QtGui.qApp.processEvents() self.setWindowTitle("Loading document cache and inverted index...") QtGui.qApp.processEvents() QtGui.QApplication.setOverrideCursor( QtGui.QCursor(QtCore.Qt.WaitCursor)) # Load engine self.engine.load() # Fill UI with some info self.labelIndexInfo.setText( "Loaded %d documents with %d terms." % (len(self.engine.documents), len(self.engine.index))) # Restore cursor QtGui.QApplication.restoreOverrideCursor() QtGui.QApplication.restoreOverrideCursor() self.setWindowTitle("Search Engine") self.groupBox.setEnabled(True)