Beispiel #1
0
    def __init__(self):
        self.theFolderName = 'true_or_false_question'
        #self.foldersNames = ['qom_questions_transformer','true_or_false_question']
        self.filesNames = [
            'true_or_false_question.tex', 'program.py',
            'util_make_random_versions.py'
        ]
        self.questionpdf = 'true_or_false_question.pdf'
        self.questionpng = 'true_or_false_question.png'
        self.linesNamepdf = ['answer_', '_true.pdf', '_false.pdf']
        self.linesNamepng = ['answer_', '_true.png', '_false.png']
        self.questionFolder = 'question'
        self.versionFolder = 'version_'
        self.directory = ''
        self.filePath = ''
        self.searchEngine = SearchEngine()

        #diference finder
        self.pyFilesNames = ['full_program.py', 'answers_program.py']
        self.pyProgram = 'program.py'
        self.pyPrograms = ['program', '.py']
        self.texFilesNames = ['true_or_false_question.tex']
        self.texAnswerNames = ['answer_', '_false.tex', '_true.tex']

        self.folders = []
        self.files = []
Beispiel #2
0
class TestSearchEngine(TestCase):
    def setUp(self):
        self.se = SearchEngine(1000,
                               'word2vec.glove.6B.300d.txt',
                               'sum',
                               forcebin=True)

    def test_execute_query_Summation(self):

        self.se.combine = 'sum'
        self.assertEqual(
            self.se.execute_query('lose', 'alcohol', False, False),
            [('booze', 23), ('laws', 228), ('lows', 578), ('dues', 621),
             ("law's", 766)])
        self.assertEqual(
            self.se.execute_query('summarize', 'warrior', False, False),
            [("samurai's", 1), ('samurais', 1), ('warriors', 100000000),
             ('somers', 100000002), ('dragon', 100000002)])

    def test_execute_query_Multiplication(self):
        self.se.combine = 'prod'
        self.assertEqual(
            self.se.execute_query('lose', 'alcohol', False, False),
            [('booze', 114), ('laws', 6501), ('dues', 9120), ("law's", 23552),
             ('lows', 38304)])
        self.assertEqual(
            self.se.execute_query('summarize', 'warrior', False, False),
            [('samurais', 1), ("samurai's", 2), ('warriors', 100000000),
             ('somers', 100000002), ('dragon', 100000002)])

    def tearDown(self):
        pass
Beispiel #3
0
 def test_Search_3(self):
     s = SearchEngine("docs", import_stopwords("stop_words.txt", HashTableSepchain()))
     query1 = s.parse_words(["ADT"])
     query2 = s.parse_words(["Computer Science"])
     self.assertEqual(s.search(query1), [('docs/data_structure.txt', 0.017277012046530055)])
     self.assertEqual(s.search(query2), [('docs/test.txt', 1.0),
                                         ('docs/information_retrieval.txt', 0.017241379310344827),
                                         ('docs/hash_table.txt', 0.009523809523809525)])
Beispiel #4
0
 def do_load(self, inp):
     """ load {FILE_PATH} """
     try:
         print(f"loading {inp}...")
         self.engine = SearchEngine(inp)
         print(f"loading succeed\n")
     except Exception as e:
         print("Loading failed with ", e)
Beispiel #5
0
def meta(request):
    se = SearchEngine()
    query = request.GET.get('q', '')
    result = []
    if len(query) > 1:
        result = se.metaSearch(query)
    return render(request, 'search/meta.html', {
        'query': query,
        'result': result,
        'len': len(result)
    })
Beispiel #6
0
def keywords(request):
    se = SearchEngine()
    query = request.GET.get('q', '')
    result = []
    if len(query) > 1:
        result = se.tfidfSearch(query)
    return render(request, 'search/keywords.html', {
        'query': query,
        'result': result,
        'len': len(result)
    })
def processCommand():
    print("Enter the option you want to search by")
    print("1.Title")
    print("2.Number")
    print("3.Subjects")
    print("4.Other")
    inp = input()
    search_result = []
    if (inp == "Title"):
        input_title =  input("Enter the title to be searched")
        se = SearchEngine()
        search_result = se.search_by_title(input_title)
        display_main(search_result)
    elif (inp == "Number"):
        input_number = input("Enter the number to be searched ")
        se = SearchEngine()
        search_result = se.search_by_call_number(input_number)
        display_main(search_result)
    elif (inp == "Subjects"):
        input_subjects = input ("Enter the subject to be searched")
        se = SearchEngine()
        search_result = se.search_by_subjects(input_subjects)
        display_main(search_result)
    elif (inp == "Other"):
        input_other= input("Enter the data to be searched ")
        se = SearchEngine()
        search_result = se.search_by_other(input_other)
        display_main(search_result)
    yesno = input ("Do you wish to continue? Yes or No")

    if (yesno == "Yes"):
        processCommand()
    elif(yesno == "No"):
        exit
Beispiel #8
0
    def __init__(self, parent=None):
        QtGui.QDialog.__init__(self, parent)

        self.setupUi(self)

        # Connect buttons
        self.pushButtonLoad.clicked.connect(self.slotLoadIndex)
        self.pushButtonGenerate.clicked.connect(self.slotGenerateIndex)
        self.lineEditQuery.textChanged.connect(self.slotCheckQueryText)
        self.pushButtonSearch.clicked.connect(self.slotProcessQuery)
        self.treeWidgetResults.itemDoubleClicked.connect(self.slotShowDocument)

        # Create search engine instance
        self.engine = SearchEngine(client=True)
Beispiel #9
0
    def clear(self):
        """
        Clears the database

        Parameters
        ----------
        None

        Returns
        -------
        None
        """
        self.engine = SearchEngine()
        self.entities = {}
        self.updateDatabase()
Beispiel #10
0
class QuerySystem:
    search_engine = None

    def __init__(self):
        self.search_engine = SearchEngine()

    def get_queries_from_file(self, filename='query.txt'):
        with open('query.txt', 'r') as query_file:
            return query_file.readlines()
        return None

    def search_query(self, query):
        results = []
        for (document, score) in self.search_engine.search(query.lower()):
            results.append((document, str(score)))
        return results

    def store_result(self, query, result):
        with open('output/' + query + '.txt', 'w') as output_file:
            for (doc, score) in result:
                output_file.write(doc+'\n')

    def print_result(self, result):
        for (document, score) in result:
            print(document + " : " + str(score))
Beispiel #11
0
def clip_poly(jsonpath, task_id, search_time, res_path):
    start = time()
    set_conf(jsonpath, task_id, search_time, res_path)
    # os.mkdir(conf.sRslPath)

    pWGSLT, pWGSRB  = BaseProcesses.read_json_area(conf.jsonpath)
    unitblock_list = get_unitblock_list(pWGSLT, pWGSRB)
    grid_dic = {}
    for unitblock in unitblock_list:
        lsGridcode = GridCalculate.GridCodeToGridlist_iPCSType(unitblock['sGridCodeLT'],\
         unitblock['sGridCodeRB'], unitblock['iPCSType'])
        for sGridCode in lsGridcode:
            lbds10kmIn = SearchEngine.SearchByRgDttmDtpd(sGridCode, conf.sDatahomePath, conf.search_time,
             conf.iDataProduct, conf.iCloulLevel)
            for lbd_time in lbds10kmIn:
                if lbd_time not in grid_dic.keys():
                    grid_dic[lbd_time] = [lbd.sPathName for lbd in lbds10kmIn[lbd_time]]
                else:
                    grid_dic[lbd_time] += [lbd.sPathName for lbd in lbds10kmIn[lbd_time]]


    for lbd_time in grid_dic:
        clip_dataset_list_groupby_time(grid_dic[lbd_time], lbd_time)
    grid_dic = None
    end = time()
    print("任务{1}耗时{0},涉及{2}个条带".format(end-start, task_id, len(unitblock_list)))
def main():
   se = SearchEngine()
   results = []
               
   ch = "y"
   
   while(ch=="y"):
      print("Menu\n1.Search by call number\n2.Search by Title\n3.Search by subject\n4.Search by other\n5.exit\n")
      print("Enter search number :")
      choice = input()
      if choice=="1":
         #print("first choice")
         string = input("Enter String :")
         results = se.search_by_call_number(string)
         for count in range(len(results)):
            results[count].display()
         #print(len(results))
         del results[:]       
      elif choice=="2":
         string = input("Enter String :")
         results = se.search_by_title(string)
         for count in range(len(results)):
            results[count].display()
         #print(len(results))
         del results[:]       
      elif choice=="3":
         string = input("Enter String :")
         results = se.search_by_subject(string)
         for count in range(len(results)):
            results[count].display()
         #print(len(results))
         del results[:]       
      elif choice=="4":
         string = input("Enter String :")
         results = se.search_by_other(string)
         for count in range(len(results)):
            results[count].display()
         #print(len(results))
         del results[:]       
      elif choice=="5":
         print("Exited")
         break
      else:
         print("Invalid choice ")
      ch = input("Do you want to continue ?(y/n)")
Beispiel #13
0
def main(argv):
    try:
        inputFilePath = argv[1]
        questionFilePath = argv[2]
    except:
        print "ERROR: Unable to read input argument!!"
        inputFilePath = 'data/a1.txt'
        questionFilePath = 'ques1.txt'
        # exit(1)

    article = Article(inputFilePath)

    # Get questions, queries, expected_types
    questions = readQuestions(questionFilePath)
    expected_types = at_detect(questionFilePath)
    # queries = [remove_stop_words_stem(question) for question in questions]

    assert len(expected_types) == len(questions)
    # assert len(expected_types) == len(queries)

    # Init classes
    se = SearchEngine(article)

    for i in range(len(questions)):
        if verbose:
            print '-' * 10

        result = se.rankByIndri(questions[i])
        topSentence = se.returnTopKResult(result, RETRIEVAL_LIMIT)

        finalAnswer = ''
        # Retrieve the top rankning answers
        for sentence in topSentence:
            if verbose:
                print 'expected_types: %s\n sentence:%s' % (expected_types[i],
                                                            sentence)

            answer = get_answer(questions[i], expected_types[i], sentence)
            if answer != '/':
                finalAnswer = answer
                break
        if verbose:
            print '==finalAnswer==  ', finalAnswer
        else:
            print finalAnswer
Beispiel #14
0
def result():
   if request.method == 'POST':
      s = time.time()
      query = request.form["query"]
      res=SearchEngine(query,matrix)
      res=getResults(res[0:15]) 
      e = time.time()
      print(s-e)
      return render_template("result.html",result = res, query = query)
Beispiel #15
0
    def __init__(self, file = ".database.pkl"):
        """
        Initializes a new Database

        Parameters
        ----------
        file: String
            path to database file (.pkl)

        Returns
        -------
        None
        """
        self.file = file #database file
        try:
            self.engine, self.entities = self.getDatabase()
            print("Loaded from database", flush = True)
        except EOFError:
            self.engine = SearchEngine()
            self.entities = {}
Beispiel #16
0
def ft_search(collection, query):
    se = SearchEngine(index_path=index_path, collection_id=collection.id)
    search_results = se.search(query, limit=100)

    res = search_results["result"]
    searcher = search_results["searcher"]

    results = []

    for hit in res[0:res.scored_length()]:  #
        try:
            d = Document.objects.get(path=hit["path"])
        except Document.MultipleObjectsReturned:
            d = Document.objects.filter(
                path=hit["path"]).order_by('id').first()

        preview = hit.highlights("content")
        results.append((d, preview))

    searcher.close()

    return results
Beispiel #17
0
class SearchEngineCLI(Cmd):
    prompt = 'DATA1050> '
    intro = """Welcome to your search engine! Type help for more information.
    load FILE_PATH
    switch TF-IDF | PageRank | smart
    search keywords...
    exit
    """

    def __init__(self):
        """ Initialize Search Engine CLI """
        super().__init__()
        self.engine = None
        self.mode = 'TF-IDF'

    def do_load(self, inp):
        """ load {FILE_PATH} """
        try:
            print(f"loading {inp}...")
            self.engine = SearchEngine(inp)
            print(f"loading succeed\n")
        except Exception as e:
            print("Loading failed with ", e)

    def do_switch(self, inp):
        """ switch {TF-IDF | PageRank | smart} """
        if inp not in {'TF-IDF', 'PageRank', 'smart'}:
            print('mode must be one of {TF-IDF, PageRank, smart}\n')
        else:
            self.mode = inp
            print(f'change mode to {inp}\n')

    def do_search(self, inp):
        """ search {keywords...} """
        if self.engine is None:
            print("load a data file first\n")
        else:
            results = self.engine.search(inp, self.mode)
            if len(results) == 0:
                print("No results returned\n")
            else:
                response = pds.DataFrame(columns=['documents', 'score'],
                                         data=results).to_string()
                print(response)
                print("")

    def do_exit(self, inp):
        """ exit the program """
        print("Bye!\n")
        return True
Beispiel #18
0
    def __init__(self, parent=None):
        QtGui.QDialog.__init__(self, parent)

        self.setupUi(self)

        # Connect buttons
        self.pushButtonLoad.clicked.connect(self.slotLoadIndex)
        self.pushButtonGenerate.clicked.connect(self.slotGenerateIndex)
        self.lineEditQuery.textChanged.connect(self.slotCheckQueryText)
        self.pushButtonSearch.clicked.connect(self.slotProcessQuery)
        self.treeWidgetResults.itemDoubleClicked.connect(self.slotShowDocument)

        # Create search engine instance
        self.engine = SearchEngine(client=True)
Beispiel #19
0
def DealIn10km(sGridCode, sDateTime, iDataProduct, iModelId, iCloulLevel,
               sDatahomePath, sRslPath):
    '''根据 格网编码 日期 类型, 处理单个10km格网内的业务 ——以基本结构表示的某景影像
    1 根据格网序编号、日期、数据类型得到该网格内待处理数据列表 —Lbds
    2 2 根据待处理数据列表,完成该格网的业务处理                 —bds
    '''
    lbds10kmIn = SearchEngine.SearchByRgDttmDtpd(sGridCode, sDatahomePath,
                                                 sDateTime, iDataProduct,
                                                 iCloulLevel)
    if (len(lbds10kmIn) >= 1):
        bdsRlt = Stitching.StichingIn10km(lbds10kmIn, iDataProduct, iModelId,
                                          sRslPath)
    else:
        bdsRlt = basic_data_struct()
        bdsRlt.sPathName = "0"
    return bdsRlt
Beispiel #20
0
class SearchEngineGUI(QtGui.QDialog, Ui_SearchEngineMainWindow):
    def __init__(self, parent=None):
        QtGui.QDialog.__init__(self, parent)

        self.setupUi(self)

        # Connect buttons
        self.pushButtonLoad.clicked.connect(self.slotLoadIndex)
        self.pushButtonGenerate.clicked.connect(self.slotGenerateIndex)
        self.lineEditQuery.textChanged.connect(self.slotCheckQueryText)
        self.pushButtonSearch.clicked.connect(self.slotProcessQuery)
        self.treeWidgetResults.itemDoubleClicked.connect(self.slotShowDocument)

        # Create search engine instance
        self.engine = SearchEngine(client=True)

    def slotShowDocument(self, item, column):
        documentWindow = DocumentWindow(self, item.text(0),
                                        item.data(1, QtCore.Qt.UserRole+1).toString(),
                                        unicode(item.data(1, QtCore.Qt.UserRole+2).toString()))
        documentWindow.show()

    def slotProcessQuery(self):
        query = self.lineEditQuery.text()
        self.treeWidgetResults.clear()
        result_dict, terms = self.engine.search(unicode(query))
        for docno, docs in result_dict.items():
            record = QtGui.QTreeWidgetItem(self.treeWidgetResults)
            record.setText(0, docno)
            record.setText(1, "%s..." % docs[:250])
            record.setData(1, QtCore.Qt.UserRole+1, docs)
            record.setData(1, QtCore.Qt.UserRole+2, ",".join(terms))

    def slotCheckQueryText(self, text):
        self.pushButtonSearch.setEnabled(self.engine.is_loaded and bool(text))

    def slotGenerateIndex(self):
        self.groupBox.setEnabled(False)
        QtGui.qApp.processEvents()
        self.setWindowTitle("Generating indexes...")
        QtGui.qApp.processEvents()
        QtGui.QApplication.setOverrideCursor(QtGui.QCursor(QtCore.Qt.WaitCursor))

        self.engine.create_index()

        # Restore cursor
        QtGui.QApplication.restoreOverrideCursor()
        QtGui.QApplication.restoreOverrideCursor()
        self.setWindowTitle("Search Engine")
        self.groupBox.setEnabled(True)

    def slotLoadIndex(self):
        self.groupBox.setEnabled(False)
        QtGui.qApp.processEvents()
        self.setWindowTitle("Loading document cache and inverted index...")
        QtGui.qApp.processEvents()
        QtGui.QApplication.setOverrideCursor(QtGui.QCursor(QtCore.Qt.WaitCursor))

        # Load engine
        self.engine.load()

        # Fill UI with some info
        self.labelIndexInfo.setText("Loaded %d documents with %d terms." % (len(self.engine.documents),
                                                                            len(self.engine.index)))
        # Restore cursor
        QtGui.QApplication.restoreOverrideCursor()
        QtGui.QApplication.restoreOverrideCursor()
        self.setWindowTitle("Search Engine")
        self.groupBox.setEnabled(True)
Beispiel #21
0
'''
Created on Mar 21, 2016

@author: robert
'''
from SearchEngine import SearchEngine

if __name__ == '__main__':
    
    engine = SearchEngine()
    
    while (True):
        num_results = input("Please enter the number of entries you like to retrieve for each query: [10] ")
        if (num_results):
            if (num_results.isdigit()):
                engine.setNumResults(int(num_results))
                break;
            else:
                print("Invalid input")
        else:
            break;
    
    while (True):
        isLoadFromCache = input("Do you want to start search engine from cached data file? [Y/n] ")
        if (not isLoadFromCache or isLoadFromCache.lower() == "y"):
            try:
                cache_file_path = input("Please enter full path to the cached data file:")
                engine.initFromCache(cache_file_path)
                break
            except Exception as e:
                print(e)
Beispiel #22
0

if __name__ == "__main__":
    start = time()
    set_conf()
    os.mkdir(conf.sRslPath)

    pWGSLT, pWGSRB = BaseProcesses.read_json_area(conf.jsonpath)
    unitblock_list = get_unitblock_list(pWGSLT, pWGSRB)
    grid_dic = {}
    for unitblock in unitblock_list:
        lsGridcode = GridCalculate.GridCodeToGridlist_iPCSType(unitblock['sGridCodeLT'],\
         unitblock['sGridCodeRB'], unitblock['iPCSType'])
        for sGridCode in lsGridcode:
            lbds10kmIn = SearchEngine.SearchByRgDttmDtpd(
                sGridCode, conf.sDatahomePath, conf.search_time,
                conf.iDataProduct, conf.iCloulLevel)
            for lbd_time in lbds10kmIn:
                if lbd_time not in grid_dic.keys():
                    grid_dic[lbd_time] = [
                        gdal.Open(lbd.sPathName)
                        for lbd in lbds10kmIn[lbd_time]
                    ]
                else:
                    grid_dic[lbd_time] += [
                        gdal.Open(lbd.sPathName)
                        for lbd in lbds10kmIn[lbd_time]
                    ]

    for lbd_time in grid_dic:
        clip_dataset_list_groupby_time(grid_dic[lbd_time], lbd_time)
Beispiel #23
0
    Authors: David Greisler (s0531301), Paul Kitt (s0528516), Marc Lehmann(s0524790)
    
"""

from SearchEngine import SearchEngine

seedURLs = [ "http://mysql12.f4.htw-berlin.de/crawl/d01.html",
             "http://mysql12.f4.htw-berlin.de/crawl/d06.html",
             "http://mysql12.f4.htw-berlin.de/crawl/d08.html" ]

stopWords = [ 'd01', 'd02', 'd03', 'd04', 'd05', 'd06', 'd07', 'd08',  
              'a', 'also', 'an', 'and', 'are', 'as', 'at', 'be', 'by', 'do',
              'for', 'have', 'is', 'in', 'it', 'of', 'or', 'see', 'so',
              'that', 'the', 'this', 'to', 'we' ]

search = SearchEngine(stopwords = stopWords, seed_urls = seedURLs)

exit_string = "/quit"

print "Enter " + exit_string + " to end the program."
print

query = ""
while 1 == 1:
    query = raw_input("Search query: ");
    
    if query == exit_string:
        break;
    
    search.query(query);
    print
Beispiel #24
0
        file = open('tmp/history_request.txt', 'a')
        file.write('\n QUERY \n' + str(query_text) + ' \n RESPONSE \n' + str(search_results) + '\n_______\n\n\n')
        file.close()

    search_results['lda'] = []

    return Response(json.dumps(search_results, ensure_ascii=False), status=200, mimetype='application/json')


def check_db_status():
    db_len = 0
    for _ in DBController.get_all_articles():
        db_len += 1
    if db_len == 0:
        print('Seeding database...')
        DatabaseSeeder.seed()


if __name__ == "__main__":
    log_of_result = []

    check_db_status()

    lsi = Storage.load_model('out/lsi', 'phyge', 'lsi')
    # lda = Storage.load_model('out/lda', 'phyge', 'lda')
    # d2v = Storage.load_model('out/d2v', 'phyge', 'd2v')

    search_engine = SearchEngine(models=[lsi])

    app.run(host='0.0.0.0', port=5050)
from SearchEngine import SearchEngine
from Query import Query
import pickle

# ai_index_documents_length = eval(open('../indices/AIindex/documents_length.txt').read())
# AIindex = SearchEngine([open('../indices/AIindex/BLOCK262'), open('../indices/AIindex/BLOCK263'), open('../indices/AIindex/BLOCK264'), open('../indices/AIindex/BLOCK265')], ai_index_documents_length)
# document_frequencies = AIindex.document_frequencies()

# with open('../indices/AIindex/document_frequencies.txt', 'w') as file:
#     file.write(str(document_frequencies))

document_frequencies = eval(open('../indices/AIindex/document_frequencies.txt').read())

concordia_ai_documents_length = eval(open('../indices/ConcordiaAI/documents_length.txt').read())
ConcordiaAI = SearchEngine([open('../indices/ConcordiaAI/BLOCK119'), open('../indices/ConcordiaAI/BLOCK120'), open('../indices/ConcordiaAI/BLOCK121'), open('../indices/ConcordiaAI/BLOCK122'), open('../indices/ConcordiaAI/BLOCK123')], concordia_ai_documents_length)

while True:
    try:
        k = int(input('Enter number of returns:'))
        query = Query(input('Enter query:'))
    except Exception as ex:
        print(ex.args[0])
        continue
    results = ConcordiaAI.search(query, document_frequencies, k)
    print('-----BM25 Ranking-----\n')
    print(results[0])
    print('-----BM25 Ranking with AITopics Document Frequencies-----\n')
    print(results[2])
    print('\n-----Tf-idf Ranking-----\n')
    print(results[1])
    print('\n-----Tf-idf Ranking with AITopics Document Frequencies-----\n')
Beispiel #26
0
#main.py
#Gaonkar, Vijay
#vrgaonkar

from __future__ import print_function
from SearchEngine import SearchEngine

search_engine = SearchEngine()
results = []
choice = 0

while choice != 5:
    print("\n\t\t\t ###############################  GAVELTON LIBRARY ############################### \n"
          "\n Welcome! Looking for something? I can help!")

    search_str = raw_input("\n Enter a word or a phrase to get started: ")
    print("\n How do you wanna search?\n"
          " 1. Search by call number\n"
          " 2. Search by title\n"
          " 3. Search by subject\n"
          " 4. Search by other\n"
          " 5. Quit\n")
    choice = input(" Your Choice: ")

    if choice == 1:
        results = search_engine.search_by_call_no(search_str)
        if len(results) > 0:
            print("\n\t\t\t ************************** Search Results ************************** ")
            for item in results:
		item.display()
Beispiel #27
0
 def __init__(self, scrambledMatrix, goalMatrix):
     SearchEngine.__init__(self, scrambledMatrix, goalMatrix)
Beispiel #28
0
from SearchEngine import SearchEngine
search_engine = SearchEngine()
search_engine.find_matches_from_wiki('hydrogen', mode='instance')

Beispiel #29
0
        output = {}

        for sub_intersetion in intersetion:
            sub = sub_intersetion.encode("utf-8")
            output[sub] = odict[sub]

        if output:
            rank = sort_by_value(output)
            html = render_template("search.html",
                                   ol=output,
                                   rank=rank,
                                   key=keywords)
        else:
            html = render_template("search.html", ol=None, key=False)
    #return jsonify({'result': rtn})
    return html


def sort_by_value(d):
    items = d.items()
    backitems = [[v[1], v[0]] for v in items]
    backitems.sort(reverse=True)
    return [backitems[i][1] for i in range(0, len(backitems))]


if __name__ == '__main__':
    se = SearchEngine()
    se.buildTries()
    app.run(debug=True, port=8886)
Beispiel #30
0
    print("\nAverages:")

    print("\tP@10: {:.5f}".format(avgPAtTen))
    print("\tinterpolated MAP: {:.5f}".format(avgMAP))
    print("\ttime: {:.5f} s".format(avgTime))

    print("\tinterpolated recall points (precision, recall):")
    for pair in avgRecallPoints:
        p, r = pair
        print("\t({:.5f}, {:.5f}),".format(p, r))


if __name__ == '__main__':
    parser = createParser()
    args = parser.parse_args()
    eng = SearchEngine()

    if args.function == CREATE_INDEX_CMD:
        collectionFolder = args.path
        start = getTime()
        menuCreateIndex(eng, collectionFolder)
        print("It took {} s to create and save the index.".format(getTime() -
                                                                  start))

    elif args.function == INTERACTIVE_QUERY_CMD:
        rankingSize = args.rSize
        menuInteractiveQuery(eng, rankingSize)

    elif args.function == PROCESS_QUERY_FILE_CMD:
        queryFile = args.path
        rankingSize = args.rSize
def main():
   se = SearchEngine()
   option_str = displayMenu()
   option = int(option_str)
   while(option !=5):
      results = list()
      if option == 1:
         callNumber = input("Enter Call Number: ")
         results = se.search_by_call_no(callNumber)
         se.display(results)
      elif option == 2:
         subject = input("Enter Subject: ")
         results = se.search_by_subjects(subject)
         se.display(results)
      elif option == 3:
         title = input("Enter Title: ")
         results = se.search_by_title(title)
         se.display(results)
      elif option == 4:
         other = input("Enter Other : ")
         results = se.search_by_other(other)
         se.display(results)
      else:
         print('\n')   
         print("Enter Correct option:")
      print('\n')   
      print("Total No. of Matching Result: ", len(results))
      print('\n')   
      option_str = displayMenu()
      option = int(option_str)
Beispiel #32
0
def metaAuthors(request):
    se = SearchEngine()
    query = request.GET.get('q', '')
    return HttpResponse(json.dumps(se.getAuthors(query)),
                        content_type="application/json")
Beispiel #33
0
                        action="store_true",
                        help="Also use explanations of idioms")
    parser.add_argument('--sound_like',
                        type=str,
                        help="Specify 'sounds-like' word")
    parser.add_argument('--means', type=str, help="Specify 'meaning' word")
    parser.add_argument('--verbose',
                        action="store_true",
                        help="Print additional debugging info")
    parser.add_argument('--func',
                        action="store_true",
                        help="Consider function words for replacement.")
    cmd_args = parser.parse_args()

    print("Hello and welcome to the pun aid!")
    se = SearchEngine(1000, cmd_args.vecs, cmd_args.combo)
    model = se.word_vectors
    print(cmd_args.ortho)

    if cmd_args.ortho and cmd_args.rhyme:
        print(
            'Looking for both orthographic matches and rhyming matches is not possible. Please choose one!'
        )
        sys.exit()

    while True:

        query = input(
            "Start search: \"Sounds like x\" \"Has to do with y\":\n> ")
        query = query.split()
        sounds_like = query[0]
Beispiel #34
0
                        wrong_ids[model_name].append(
                            (true_sourse, found_sourse))
                output_answer.append(
                    dict(true_sourse=true_sourse,
                         sourse=found_sourse,
                         model=model_name,
                         title=answer[model_name][0]['title'],
                         similarity=answer[model_name][0]['similarity']))
        file.write(json.dumps(output_answer, indent=2, ensure_ascii=False))
        file.write(json.dumps(wrong_ids, indent=2, ensure_ascii=False))


if __name__ == "__main__":
    log_of_result = []

    if len(DBController.get_all_documents()) == 0:
        print('Seeding database...')
        DatabaseSeeder.seed()

    lsi = Storage.load_model('out/lsi', 'phyge', 'lsi')

    lda = Storage.load_model('out/lda', 'phyge', 'lda')
    d2v = Storage.load_model('out/d2v', 'phyge', 'd2v')
    fast_text = Storage.load_model('out/fast_text', 'phyge', 'ft')
    search_engine = SearchEngine(models=[fast_text, d2v, lda, lsi])
    test_path = os.path.join(PhyVariables.testsDir,
                             'test_' + str(PhyVariables.queriesId))
    run_search(os.path.join(test_path, PhyVariables.queriesFileName),
               os.path.join(test_path, PhyVariables.answersFileName), 1)
    # run_search('Resources/pdf_articles.json','Resources/answers.json',1)
Beispiel #35
0
class ManipulatePyExercise:
    def __init__(self):
        self.theFolderName = 'true_or_false_question'
        #self.foldersNames = ['qom_questions_transformer','true_or_false_question']
        self.filesNames = [
            'true_or_false_question.tex', 'program.py',
            'util_make_random_versions.py'
        ]
        self.questionpdf = 'true_or_false_question.pdf'
        self.questionpng = 'true_or_false_question.png'
        self.linesNamepdf = ['answer_', '_true.pdf', '_false.pdf']
        self.linesNamepng = ['answer_', '_true.png', '_false.png']
        self.questionFolder = 'question'
        self.versionFolder = 'version_'
        self.directory = ''
        self.filePath = ''
        self.searchEngine = SearchEngine()

        #diference finder
        self.pyFilesNames = ['full_program.py', 'answers_program.py']
        self.pyProgram = 'program.py'
        self.pyPrograms = ['program', '.py']
        self.texFilesNames = ['true_or_false_question.tex']
        self.texAnswerNames = ['answer_', '_false.tex', '_true.tex']

        self.folders = []
        self.files = []

    def seeIfExercise(self, path):
        #verificar se a pasta tem os ficheiros necessários para um exercício
        self.searchEngine.setDirectory(path)  #colocar na diretoria
        files = self.searchEngine.getAllFiles()  #ir buscar ficheiros
        if self.isFilesExercise(files):  #verificar ficheiros
            return True
        return False

    def isFolderExercise(self, folders):  #not in use
        counter = 0
        for folder in folders:  #verificar se existe um folder em que o nome se inicia por 'true_or_false_question'
            for name in self.foldersNames:
                if folder[:len(name)] == name:
                    counter += 1
                    if name == self.theFolderName:  #se o folder do exercicio existir
                        self.filePath = folder  #guardar nova diretoria para ser usada
        if counter == len(
                self.foldersNames
        ):  #se todos os folders estiverem na pasta é uma pasta exercício
            return True
        return False

    def isFilesExercise(self, files):
        #verificar se o folder tem os ficheiros necessários ao exercício
        counter = 0
        for file in files:
            for name in self.filesNames:
                if file == name:
                    counter += 1
        if counter == len(self.filesNames):
            return True
        return False

    def getExerciseImg(self):
        #verificar se o pdf existe
        if self.searchEngine.fileExists(self.questionpdf):
            #verificar se a imagem já existe
            if not self.searchEngine.fileExists(self.questionpng):
                pages = convert_from_path(self.questionpdf, 500)
                for page in pages:
                    page.save(self.questionpng, 'PNG')
            file = open(self.questionpng, 'rb')
            img = file.read()
            return img
        else:
            print('Não foi possivél encontrar o pdf do enunciado')
            return

    def getExerciseLinesImg(self):
        #retorna as magens png das alineas
        done = False
        lines = []
        i = 0
        while done == False:
            i = i + 1
            name_true = self.linesNamepdf[0] + str(i) + self.linesNamepdf[1]
            name_false = self.linesNamepdf[0] + str(i) + self.linesNamepdf[2]
            #verificar se o ficheiro pdf existe
            if self.searchEngine.fileExists(
                    name_true) and self.searchEngine.fileExists(name_false):
                #se existir verificar de o png existe
                new_pages1 = self.linesNamepng[0] + str(
                    i) + self.linesNamepng[1]
                new_pages2 = self.linesNamepng[0] + str(
                    i) + self.linesNamepng[2]
                #veriricar se os png já existem, se não criar
                if not self.searchEngine.fileExists(new_pages1):
                    #se não existir vamos criar
                    pages1 = convert_from_path(name_true, 500)
                    pages2 = convert_from_path(name_false, 500)
                    #guardar png
                    for page in pages1:
                        page.save(new_pages1, 'PNG')
                    for page in pages2:
                        page.save(new_pages2, 'PNG')
                #ir buscar png e guardar
                file_true = open(new_pages1, 'rb')
                img1 = file_true.read()
                file_false = open(new_pages2, 'rb')
                img2 = file_false.read()

                lines.append(img1)
                lines.append(img2)
            #se não existe terminar
            else:
                if i == 1:
                    print('Não foi possivél encontrar os pdfs das alineas')
                    return
                done = True
        return lines

    def getExerciseName(self):
        return self.searchEngine.getLastFolder()

    def getPyFilesNames(self):
        files = []
        #procurar programs.py
        if self.searchEngine.fileExists(self.pyProgram):
            files.append(self.pyProgram)
        i = 2
        while self.searchEngine.fileExists(self.pyPrograms[0] + str(i) +
                                           self.pyPrograms[1]):
            files.append(self.pyPrograms[0] + str(i) + self.pyPrograms[1])
            i = i + 1
        for file in self.pyFilesNames:
            if self.searchEngine.fileExists(file):
                files.append(file)
        return files

    def getTexFilesNames(self):
        files = []
        for file in self.texFilesNames:
            if self.searchEngine.fileExists(file):
                files.append(file)
        i = 1
        while self.searchEngine.fileExists(self.texAnswerNames[0] + str(i) +
                                           self.texAnswerNames[1]):
            files.append(self.texAnswerNames[0] + str(i) +
                         self.texAnswerNames[1])
            files.append(self.texAnswerNames[0] + str(i) +
                         self.texAnswerNames[2])
            i = i + 1
        return files

    def seeIfHasVersions(self):
        #verificar se o exercicio tem a pasta question
        if self.searchEngine.folderExists(self.questionFolder):
            self.searchEngine.goFoward(self.questionFolder)
            #verificar se existe pelo menos um folder de versões
            if self.searchEngine.folderExists(self.versionFolder + '1'):
                #voltar a traz e retornar true
                self.searchEngine.goBack()
                return True
        return False

    def getVersionsNames(self):
        i = 1
        names = []
        self.searchEngine.goFoward(self.questionFolder)
        while self.searchEngine.folderExists(self.versionFolder + str(i)):
            names.append(self.versionFolder + str(i))
            i = i + 1
        self.searchEngine.goBack()
        return names

    def getVersionImgs(self, version_number):
        imgs = []
        self.searchEngine.goFoward(self.questionFolder)
        self.searchEngine.goFoward(self.versionFolder + version_number)
        #ir buscar enunciado
        imgs.append(self.getExerciseImg())
        #ir buscar alineas
        imgs.append(self.getExerciseLinesImg())
        self.searchEngine.goBack()
        self.searchEngine.goBack()
        return imgs

    def getPyFileDifferences(self, fileName):
        return pythonHTML(fileName)

    def getTexFileDifference(self, fileName):
        return questionHTML(fileName)

    def changeNumberOfVersions(self, number):
        with open("make_random_versions.py", "r") as f:
            lines = f.readlines()
        with open("make_random_versions.py", "w") as f:
            for line in lines:
                if not ("number_of_versions = " in line.strip("\n")):
                    f.write(line)
                else:
                    f.write('number_of_versions = ' + str(number) + '\n')

    def generateVersions(self):
        try:
            self.searchEngine.runFile('make_random_versions.py')
            self.searchEngine.runFile('make_pdfs.py')
        except:
            print('Ocorreu uma excepção na geração de versões')
Beispiel #36
0
class Database:
    def __init__(self, file = ".database.pkl"):
        """
        Initializes a new Database

        Parameters
        ----------
        file: String
            path to database file (.pkl)

        Returns
        -------
        None
        """
        self.file = file #database file
        try:
            self.engine, self.entities = self.getDatabase()
            print("Loaded from database", flush = True)
        except EOFError:
            self.engine = SearchEngine()
            self.entities = {}
        
    def __repr__(self):
        return "{}".format(self.engine.raw_text.keys())

    def addDocument(self, text_id, text):
        """
        Adds a document to the database

        Parameters
        ----------
        id: id of document to add
        text: rawtext

        Returns
        -------
        None
        """
        #TODO: Finish
        self.engine.add(text_id, text)
        entities_list = self.engine.getEntities(text)
        self.entities[text_id]= entities_list
        for entity in entities_list:
            self.engine.inverted_index[entity].add(text_id)
        self.updateDatabase()

    def updateDatabase(self):
        """
        Writes self.engine and self.entities to database file

        Parameters
        ----------
        None

        Returns
        -------
        None
        """
        with open(self.file, "wb") as f:
            pickle.dump((self.engine, self.entities), f)

    def getDatabase(self):
        """
        Retrieves engine and entities from saved database file

        Parameters
        ----------
        None

        Returns
        -------
        Tuple of (engine, entities), where music and songs are both dictionaries
        """
        with open(self.file, "rb") as f:
            engine, entities = pickle.load(f)
        return (engine, entities)

    def removeDocument(self, text_id):
        """
        Removes a document from the database

        Parameters
        ----------
        text_id: string
            text_id of document to remove

        Returns
        -------
        None
        """
        self.engine.remove(text_id)
        for entity in self.entities[text_id]:
            if text_id in self.engine.inverted_index[entity]:
                self.engine.inverted_index[entity].remove(text_id)

        del self.entities[text_id]
        self.updateDatabase()

    def clear(self):
        """
        Clears the database

        Parameters
        ----------
        None

        Returns
        -------
        None
        """
        self.engine = SearchEngine()
        self.entities = {}
        self.updateDatabase()
Beispiel #37
0
from flask import Flask, request
from flask_cors import CORS, cross_origin
from SearchEngine import SearchEngine
from QueryResponse import QueryResponse, from_query_result
import os

# --------------------------------|
# In order to execute run:        |
#     `python server.py`          |
# --------------------------------|

engine = SearchEngine()
app = Flask(__name__, static_url_path="", static_folder="resources/web/static")
app.config["CORS_HEADERS"] = "Content-Type"

CORS(app)


@app.route("/")
def root():
    return app.send_static_file("index.html")


@app.route("/query", methods=["GET"])
def query():
    query_text = request.args["query"]
    svd_param = request.args.get("svd")
    k_largest = request.args.get("k")

    k = 20
    if k_largest:
Beispiel #38
0
 def __init__(self):
     self.search_engine = SearchEngine()
Beispiel #39
0
 def setUp(self):
     self.se = SearchEngine(1000,
                            'word2vec.glove.6B.300d.txt',
                            'sum',
                            forcebin=True)
Beispiel #40
0
#!/usr/bin/python

from __future__ import print_function
from SearchEngine import SearchEngine
import sys
choice = 0
se = SearchEngine()

if len(sys.argv) is not None:
	argument = str(sys.argv)
	if "book" in argument:
		se.parse_book()			
	if "film" in argument:
		se.parse_film()
	if "period" in argument:
		se.parse_periodic()
	if "video" in argument:
		se.parse_video()


while choice!= 5:
	print("__________________Welcome_________________")
	print("1.Search by Call Number")
	print("2.Search by Title")
	print("3.Search by Subject")
	print("4.Search by Others")
	print("5.Exit")
	print("__________________________________________")
	choice = input("Please enter your choice:")

	if choice == 1: