Example #1
0
class QuerySystem:
    search_engine = None

    def __init__(self):
        self.search_engine = SearchEngine()

    def get_queries_from_file(self, filename='query.txt'):
        with open('query.txt', 'r') as query_file:
            return query_file.readlines()
        return None

    def search_query(self, query):
        results = []
        for (document, score) in self.search_engine.search(query.lower()):
            results.append((document, str(score)))
        return results

    def store_result(self, query, result):
        with open('output/' + query + '.txt', 'w') as output_file:
            for (doc, score) in result:
                output_file.write(doc+'\n')

    def print_result(self, result):
        for (document, score) in result:
            print(document + " : " + str(score))
Example #2
0
 def test_Search_3(self):
     s = SearchEngine("docs", import_stopwords("stop_words.txt", HashTableSepchain()))
     query1 = s.parse_words(["ADT"])
     query2 = s.parse_words(["Computer Science"])
     self.assertEqual(s.search(query1), [('docs/data_structure.txt', 0.017277012046530055)])
     self.assertEqual(s.search(query2), [('docs/test.txt', 1.0),
                                         ('docs/information_retrieval.txt', 0.017241379310344827),
                                         ('docs/hash_table.txt', 0.009523809523809525)])
Example #3
0
class SearchEngineCLI(Cmd):
    prompt = 'DATA1050> '
    intro = """Welcome to your search engine! Type help for more information.
    load FILE_PATH
    switch TF-IDF | PageRank | smart
    search keywords...
    exit
    """

    def __init__(self):
        """ Initialize Search Engine CLI """
        super().__init__()
        self.engine = None
        self.mode = 'TF-IDF'

    def do_load(self, inp):
        """ load {FILE_PATH} """
        try:
            print(f"loading {inp}...")
            self.engine = SearchEngine(inp)
            print(f"loading succeed\n")
        except Exception as e:
            print("Loading failed with ", e)

    def do_switch(self, inp):
        """ switch {TF-IDF | PageRank | smart} """
        if inp not in {'TF-IDF', 'PageRank', 'smart'}:
            print('mode must be one of {TF-IDF, PageRank, smart}\n')
        else:
            self.mode = inp
            print(f'change mode to {inp}\n')

    def do_search(self, inp):
        """ search {keywords...} """
        if self.engine is None:
            print("load a data file first\n")
        else:
            results = self.engine.search(inp, self.mode)
            if len(results) == 0:
                print("No results returned\n")
            else:
                response = pds.DataFrame(columns=['documents', 'score'],
                                         data=results).to_string()
                print(response)
                print("")

    def do_exit(self, inp):
        """ exit the program """
        print("Bye!\n")
        return True
Example #4
0
def ft_search(collection, query):
    se = SearchEngine(index_path=index_path, collection_id=collection.id)
    search_results = se.search(query, limit=100)

    res = search_results["result"]
    searcher = search_results["searcher"]

    results = []

    for hit in res[0:res.scored_length()]:  #
        try:
            d = Document.objects.get(path=hit["path"])
        except Document.MultipleObjectsReturned:
            d = Document.objects.filter(
                path=hit["path"]).order_by('id').first()

        preview = hit.highlights("content")
        results.append((d, preview))

    searcher.close()

    return results
Example #5
0
class SearchEngineGUI(QtGui.QDialog, Ui_SearchEngineMainWindow):
    def __init__(self, parent=None):
        QtGui.QDialog.__init__(self, parent)

        self.setupUi(self)

        # Connect buttons
        self.pushButtonLoad.clicked.connect(self.slotLoadIndex)
        self.pushButtonGenerate.clicked.connect(self.slotGenerateIndex)
        self.lineEditQuery.textChanged.connect(self.slotCheckQueryText)
        self.pushButtonSearch.clicked.connect(self.slotProcessQuery)
        self.treeWidgetResults.itemDoubleClicked.connect(self.slotShowDocument)

        # Create search engine instance
        self.engine = SearchEngine(client=True)

    def slotShowDocument(self, item, column):
        documentWindow = DocumentWindow(self, item.text(0),
                                        item.data(1, QtCore.Qt.UserRole+1).toString(),
                                        unicode(item.data(1, QtCore.Qt.UserRole+2).toString()))
        documentWindow.show()

    def slotProcessQuery(self):
        query = self.lineEditQuery.text()
        self.treeWidgetResults.clear()
        result_dict, terms = self.engine.search(unicode(query))
        for docno, docs in result_dict.items():
            record = QtGui.QTreeWidgetItem(self.treeWidgetResults)
            record.setText(0, docno)
            record.setText(1, "%s..." % docs[:250])
            record.setData(1, QtCore.Qt.UserRole+1, docs)
            record.setData(1, QtCore.Qt.UserRole+2, ",".join(terms))

    def slotCheckQueryText(self, text):
        self.pushButtonSearch.setEnabled(self.engine.is_loaded and bool(text))

    def slotGenerateIndex(self):
        self.groupBox.setEnabled(False)
        QtGui.qApp.processEvents()
        self.setWindowTitle("Generating indexes...")
        QtGui.qApp.processEvents()
        QtGui.QApplication.setOverrideCursor(QtGui.QCursor(QtCore.Qt.WaitCursor))

        self.engine.create_index()

        # Restore cursor
        QtGui.QApplication.restoreOverrideCursor()
        QtGui.QApplication.restoreOverrideCursor()
        self.setWindowTitle("Search Engine")
        self.groupBox.setEnabled(True)

    def slotLoadIndex(self):
        self.groupBox.setEnabled(False)
        QtGui.qApp.processEvents()
        self.setWindowTitle("Loading document cache and inverted index...")
        QtGui.qApp.processEvents()
        QtGui.QApplication.setOverrideCursor(QtGui.QCursor(QtCore.Qt.WaitCursor))

        # Load engine
        self.engine.load()

        # Fill UI with some info
        self.labelIndexInfo.setText("Loaded %d documents with %d terms." % (len(self.engine.documents),
                                                                            len(self.engine.index)))
        # Restore cursor
        QtGui.QApplication.restoreOverrideCursor()
        QtGui.QApplication.restoreOverrideCursor()
        self.setWindowTitle("Search Engine")
        self.groupBox.setEnabled(True)
Example #6
0
from Query import Query
import pickle

# ai_index_documents_length = eval(open('../indices/AIindex/documents_length.txt').read())
# AIindex = SearchEngine([open('../indices/AIindex/BLOCK262'), open('../indices/AIindex/BLOCK263'), open('../indices/AIindex/BLOCK264'), open('../indices/AIindex/BLOCK265')], ai_index_documents_length)
# document_frequencies = AIindex.document_frequencies()

# with open('../indices/AIindex/document_frequencies.txt', 'w') as file:
#     file.write(str(document_frequencies))

document_frequencies = eval(open('../indices/AIindex/document_frequencies.txt').read())

concordia_ai_documents_length = eval(open('../indices/ConcordiaAI/documents_length.txt').read())
ConcordiaAI = SearchEngine([open('../indices/ConcordiaAI/BLOCK119'), open('../indices/ConcordiaAI/BLOCK120'), open('../indices/ConcordiaAI/BLOCK121'), open('../indices/ConcordiaAI/BLOCK122'), open('../indices/ConcordiaAI/BLOCK123')], concordia_ai_documents_length)

while True:
    try:
        k = int(input('Enter number of returns:'))
        query = Query(input('Enter query:'))
    except Exception as ex:
        print(ex.args[0])
        continue
    results = ConcordiaAI.search(query, document_frequencies, k)
    print('-----BM25 Ranking-----\n')
    print(results[0])
    print('-----BM25 Ranking with AITopics Document Frequencies-----\n')
    print(results[2])
    print('\n-----Tf-idf Ranking-----\n')
    print(results[1])
    print('\n-----Tf-idf Ranking with AITopics Document Frequencies-----\n')
    print(results[3])
Example #7
0
class SearchEngineGUI(QtGui.QDialog, Ui_SearchEngineMainWindow):
    def __init__(self, parent=None):
        QtGui.QDialog.__init__(self, parent)

        self.setupUi(self)

        # Connect buttons
        self.pushButtonLoad.clicked.connect(self.slotLoadIndex)
        self.pushButtonGenerate.clicked.connect(self.slotGenerateIndex)
        self.lineEditQuery.textChanged.connect(self.slotCheckQueryText)
        self.pushButtonSearch.clicked.connect(self.slotProcessQuery)
        self.treeWidgetResults.itemDoubleClicked.connect(self.slotShowDocument)

        # Create search engine instance
        self.engine = SearchEngine(client=True)

    def slotShowDocument(self, item, column):
        documentWindow = DocumentWindow(
            self, item.text(0),
            item.data(1, QtCore.Qt.UserRole + 1).toString(),
            unicode(item.data(1, QtCore.Qt.UserRole + 2).toString()))
        documentWindow.show()

    def slotProcessQuery(self):
        query = self.lineEditQuery.text()
        self.treeWidgetResults.clear()
        result_dict, terms = self.engine.search(unicode(query))
        for docno, docs in result_dict.items():
            record = QtGui.QTreeWidgetItem(self.treeWidgetResults)
            record.setText(0, docno)
            record.setText(1, "%s..." % docs[:250])
            record.setData(1, QtCore.Qt.UserRole + 1, docs)
            record.setData(1, QtCore.Qt.UserRole + 2, ",".join(terms))

    def slotCheckQueryText(self, text):
        self.pushButtonSearch.setEnabled(self.engine.is_loaded and bool(text))

    def slotGenerateIndex(self):
        self.groupBox.setEnabled(False)
        QtGui.qApp.processEvents()
        self.setWindowTitle("Generating indexes...")
        QtGui.qApp.processEvents()
        QtGui.QApplication.setOverrideCursor(
            QtGui.QCursor(QtCore.Qt.WaitCursor))

        self.engine.create_index()

        # Restore cursor
        QtGui.QApplication.restoreOverrideCursor()
        QtGui.QApplication.restoreOverrideCursor()
        self.setWindowTitle("Search Engine")
        self.groupBox.setEnabled(True)

    def slotLoadIndex(self):
        self.groupBox.setEnabled(False)
        QtGui.qApp.processEvents()
        self.setWindowTitle("Loading document cache and inverted index...")
        QtGui.qApp.processEvents()
        QtGui.QApplication.setOverrideCursor(
            QtGui.QCursor(QtCore.Qt.WaitCursor))

        # Load engine
        self.engine.load()

        # Fill UI with some info
        self.labelIndexInfo.setText(
            "Loaded %d documents with %d terms." %
            (len(self.engine.documents), len(self.engine.index)))
        # Restore cursor
        QtGui.QApplication.restoreOverrideCursor()
        QtGui.QApplication.restoreOverrideCursor()
        self.setWindowTitle("Search Engine")
        self.groupBox.setEnabled(True)