def __init__(self): self.theFolderName = 'true_or_false_question' #self.foldersNames = ['qom_questions_transformer','true_or_false_question'] self.filesNames = [ 'true_or_false_question.tex', 'program.py', 'util_make_random_versions.py' ] self.questionpdf = 'true_or_false_question.pdf' self.questionpng = 'true_or_false_question.png' self.linesNamepdf = ['answer_', '_true.pdf', '_false.pdf'] self.linesNamepng = ['answer_', '_true.png', '_false.png'] self.questionFolder = 'question' self.versionFolder = 'version_' self.directory = '' self.filePath = '' self.searchEngine = SearchEngine() #diference finder self.pyFilesNames = ['full_program.py', 'answers_program.py'] self.pyProgram = 'program.py' self.pyPrograms = ['program', '.py'] self.texFilesNames = ['true_or_false_question.tex'] self.texAnswerNames = ['answer_', '_false.tex', '_true.tex'] self.folders = [] self.files = []
class TestSearchEngine(TestCase): def setUp(self): self.se = SearchEngine(1000, 'word2vec.glove.6B.300d.txt', 'sum', forcebin=True) def test_execute_query_Summation(self): self.se.combine = 'sum' self.assertEqual( self.se.execute_query('lose', 'alcohol', False, False), [('booze', 23), ('laws', 228), ('lows', 578), ('dues', 621), ("law's", 766)]) self.assertEqual( self.se.execute_query('summarize', 'warrior', False, False), [("samurai's", 1), ('samurais', 1), ('warriors', 100000000), ('somers', 100000002), ('dragon', 100000002)]) def test_execute_query_Multiplication(self): self.se.combine = 'prod' self.assertEqual( self.se.execute_query('lose', 'alcohol', False, False), [('booze', 114), ('laws', 6501), ('dues', 9120), ("law's", 23552), ('lows', 38304)]) self.assertEqual( self.se.execute_query('summarize', 'warrior', False, False), [('samurais', 1), ("samurai's", 2), ('warriors', 100000000), ('somers', 100000002), ('dragon', 100000002)]) def tearDown(self): pass
def test_Search_3(self): s = SearchEngine("docs", import_stopwords("stop_words.txt", HashTableSepchain())) query1 = s.parse_words(["ADT"]) query2 = s.parse_words(["Computer Science"]) self.assertEqual(s.search(query1), [('docs/data_structure.txt', 0.017277012046530055)]) self.assertEqual(s.search(query2), [('docs/test.txt', 1.0), ('docs/information_retrieval.txt', 0.017241379310344827), ('docs/hash_table.txt', 0.009523809523809525)])
def do_load(self, inp): """ load {FILE_PATH} """ try: print(f"loading {inp}...") self.engine = SearchEngine(inp) print(f"loading succeed\n") except Exception as e: print("Loading failed with ", e)
def meta(request): se = SearchEngine() query = request.GET.get('q', '') result = [] if len(query) > 1: result = se.metaSearch(query) return render(request, 'search/meta.html', { 'query': query, 'result': result, 'len': len(result) })
def keywords(request): se = SearchEngine() query = request.GET.get('q', '') result = [] if len(query) > 1: result = se.tfidfSearch(query) return render(request, 'search/keywords.html', { 'query': query, 'result': result, 'len': len(result) })
def processCommand(): print("Enter the option you want to search by") print("1.Title") print("2.Number") print("3.Subjects") print("4.Other") inp = input() search_result = [] if (inp == "Title"): input_title = input("Enter the title to be searched") se = SearchEngine() search_result = se.search_by_title(input_title) display_main(search_result) elif (inp == "Number"): input_number = input("Enter the number to be searched ") se = SearchEngine() search_result = se.search_by_call_number(input_number) display_main(search_result) elif (inp == "Subjects"): input_subjects = input ("Enter the subject to be searched") se = SearchEngine() search_result = se.search_by_subjects(input_subjects) display_main(search_result) elif (inp == "Other"): input_other= input("Enter the data to be searched ") se = SearchEngine() search_result = se.search_by_other(input_other) display_main(search_result) yesno = input ("Do you wish to continue? Yes or No") if (yesno == "Yes"): processCommand() elif(yesno == "No"): exit
def __init__(self, parent=None): QtGui.QDialog.__init__(self, parent) self.setupUi(self) # Connect buttons self.pushButtonLoad.clicked.connect(self.slotLoadIndex) self.pushButtonGenerate.clicked.connect(self.slotGenerateIndex) self.lineEditQuery.textChanged.connect(self.slotCheckQueryText) self.pushButtonSearch.clicked.connect(self.slotProcessQuery) self.treeWidgetResults.itemDoubleClicked.connect(self.slotShowDocument) # Create search engine instance self.engine = SearchEngine(client=True)
def clear(self): """ Clears the database Parameters ---------- None Returns ------- None """ self.engine = SearchEngine() self.entities = {} self.updateDatabase()
class QuerySystem: search_engine = None def __init__(self): self.search_engine = SearchEngine() def get_queries_from_file(self, filename='query.txt'): with open('query.txt', 'r') as query_file: return query_file.readlines() return None def search_query(self, query): results = [] for (document, score) in self.search_engine.search(query.lower()): results.append((document, str(score))) return results def store_result(self, query, result): with open('output/' + query + '.txt', 'w') as output_file: for (doc, score) in result: output_file.write(doc+'\n') def print_result(self, result): for (document, score) in result: print(document + " : " + str(score))
def clip_poly(jsonpath, task_id, search_time, res_path): start = time() set_conf(jsonpath, task_id, search_time, res_path) # os.mkdir(conf.sRslPath) pWGSLT, pWGSRB = BaseProcesses.read_json_area(conf.jsonpath) unitblock_list = get_unitblock_list(pWGSLT, pWGSRB) grid_dic = {} for unitblock in unitblock_list: lsGridcode = GridCalculate.GridCodeToGridlist_iPCSType(unitblock['sGridCodeLT'],\ unitblock['sGridCodeRB'], unitblock['iPCSType']) for sGridCode in lsGridcode: lbds10kmIn = SearchEngine.SearchByRgDttmDtpd(sGridCode, conf.sDatahomePath, conf.search_time, conf.iDataProduct, conf.iCloulLevel) for lbd_time in lbds10kmIn: if lbd_time not in grid_dic.keys(): grid_dic[lbd_time] = [lbd.sPathName for lbd in lbds10kmIn[lbd_time]] else: grid_dic[lbd_time] += [lbd.sPathName for lbd in lbds10kmIn[lbd_time]] for lbd_time in grid_dic: clip_dataset_list_groupby_time(grid_dic[lbd_time], lbd_time) grid_dic = None end = time() print("任务{1}耗时{0},涉及{2}个条带".format(end-start, task_id, len(unitblock_list)))
def main(): se = SearchEngine() results = [] ch = "y" while(ch=="y"): print("Menu\n1.Search by call number\n2.Search by Title\n3.Search by subject\n4.Search by other\n5.exit\n") print("Enter search number :") choice = input() if choice=="1": #print("first choice") string = input("Enter String :") results = se.search_by_call_number(string) for count in range(len(results)): results[count].display() #print(len(results)) del results[:] elif choice=="2": string = input("Enter String :") results = se.search_by_title(string) for count in range(len(results)): results[count].display() #print(len(results)) del results[:] elif choice=="3": string = input("Enter String :") results = se.search_by_subject(string) for count in range(len(results)): results[count].display() #print(len(results)) del results[:] elif choice=="4": string = input("Enter String :") results = se.search_by_other(string) for count in range(len(results)): results[count].display() #print(len(results)) del results[:] elif choice=="5": print("Exited") break else: print("Invalid choice ") ch = input("Do you want to continue ?(y/n)")
def main(argv): try: inputFilePath = argv[1] questionFilePath = argv[2] except: print "ERROR: Unable to read input argument!!" inputFilePath = 'data/a1.txt' questionFilePath = 'ques1.txt' # exit(1) article = Article(inputFilePath) # Get questions, queries, expected_types questions = readQuestions(questionFilePath) expected_types = at_detect(questionFilePath) # queries = [remove_stop_words_stem(question) for question in questions] assert len(expected_types) == len(questions) # assert len(expected_types) == len(queries) # Init classes se = SearchEngine(article) for i in range(len(questions)): if verbose: print '-' * 10 result = se.rankByIndri(questions[i]) topSentence = se.returnTopKResult(result, RETRIEVAL_LIMIT) finalAnswer = '' # Retrieve the top rankning answers for sentence in topSentence: if verbose: print 'expected_types: %s\n sentence:%s' % (expected_types[i], sentence) answer = get_answer(questions[i], expected_types[i], sentence) if answer != '/': finalAnswer = answer break if verbose: print '==finalAnswer== ', finalAnswer else: print finalAnswer
def result(): if request.method == 'POST': s = time.time() query = request.form["query"] res=SearchEngine(query,matrix) res=getResults(res[0:15]) e = time.time() print(s-e) return render_template("result.html",result = res, query = query)
def __init__(self, file = ".database.pkl"): """ Initializes a new Database Parameters ---------- file: String path to database file (.pkl) Returns ------- None """ self.file = file #database file try: self.engine, self.entities = self.getDatabase() print("Loaded from database", flush = True) except EOFError: self.engine = SearchEngine() self.entities = {}
def ft_search(collection, query): se = SearchEngine(index_path=index_path, collection_id=collection.id) search_results = se.search(query, limit=100) res = search_results["result"] searcher = search_results["searcher"] results = [] for hit in res[0:res.scored_length()]: # try: d = Document.objects.get(path=hit["path"]) except Document.MultipleObjectsReturned: d = Document.objects.filter( path=hit["path"]).order_by('id').first() preview = hit.highlights("content") results.append((d, preview)) searcher.close() return results
class SearchEngineCLI(Cmd): prompt = 'DATA1050> ' intro = """Welcome to your search engine! Type help for more information. load FILE_PATH switch TF-IDF | PageRank | smart search keywords... exit """ def __init__(self): """ Initialize Search Engine CLI """ super().__init__() self.engine = None self.mode = 'TF-IDF' def do_load(self, inp): """ load {FILE_PATH} """ try: print(f"loading {inp}...") self.engine = SearchEngine(inp) print(f"loading succeed\n") except Exception as e: print("Loading failed with ", e) def do_switch(self, inp): """ switch {TF-IDF | PageRank | smart} """ if inp not in {'TF-IDF', 'PageRank', 'smart'}: print('mode must be one of {TF-IDF, PageRank, smart}\n') else: self.mode = inp print(f'change mode to {inp}\n') def do_search(self, inp): """ search {keywords...} """ if self.engine is None: print("load a data file first\n") else: results = self.engine.search(inp, self.mode) if len(results) == 0: print("No results returned\n") else: response = pds.DataFrame(columns=['documents', 'score'], data=results).to_string() print(response) print("") def do_exit(self, inp): """ exit the program """ print("Bye!\n") return True
def DealIn10km(sGridCode, sDateTime, iDataProduct, iModelId, iCloulLevel, sDatahomePath, sRslPath): '''根据 格网编码 日期 类型, 处理单个10km格网内的业务 ——以基本结构表示的某景影像 1 根据格网序编号、日期、数据类型得到该网格内待处理数据列表 —Lbds 2 2 根据待处理数据列表,完成该格网的业务处理 —bds ''' lbds10kmIn = SearchEngine.SearchByRgDttmDtpd(sGridCode, sDatahomePath, sDateTime, iDataProduct, iCloulLevel) if (len(lbds10kmIn) >= 1): bdsRlt = Stitching.StichingIn10km(lbds10kmIn, iDataProduct, iModelId, sRslPath) else: bdsRlt = basic_data_struct() bdsRlt.sPathName = "0" return bdsRlt
class SearchEngineGUI(QtGui.QDialog, Ui_SearchEngineMainWindow): def __init__(self, parent=None): QtGui.QDialog.__init__(self, parent) self.setupUi(self) # Connect buttons self.pushButtonLoad.clicked.connect(self.slotLoadIndex) self.pushButtonGenerate.clicked.connect(self.slotGenerateIndex) self.lineEditQuery.textChanged.connect(self.slotCheckQueryText) self.pushButtonSearch.clicked.connect(self.slotProcessQuery) self.treeWidgetResults.itemDoubleClicked.connect(self.slotShowDocument) # Create search engine instance self.engine = SearchEngine(client=True) def slotShowDocument(self, item, column): documentWindow = DocumentWindow(self, item.text(0), item.data(1, QtCore.Qt.UserRole+1).toString(), unicode(item.data(1, QtCore.Qt.UserRole+2).toString())) documentWindow.show() def slotProcessQuery(self): query = self.lineEditQuery.text() self.treeWidgetResults.clear() result_dict, terms = self.engine.search(unicode(query)) for docno, docs in result_dict.items(): record = QtGui.QTreeWidgetItem(self.treeWidgetResults) record.setText(0, docno) record.setText(1, "%s..." % docs[:250]) record.setData(1, QtCore.Qt.UserRole+1, docs) record.setData(1, QtCore.Qt.UserRole+2, ",".join(terms)) def slotCheckQueryText(self, text): self.pushButtonSearch.setEnabled(self.engine.is_loaded and bool(text)) def slotGenerateIndex(self): self.groupBox.setEnabled(False) QtGui.qApp.processEvents() self.setWindowTitle("Generating indexes...") QtGui.qApp.processEvents() QtGui.QApplication.setOverrideCursor(QtGui.QCursor(QtCore.Qt.WaitCursor)) self.engine.create_index() # Restore cursor QtGui.QApplication.restoreOverrideCursor() QtGui.QApplication.restoreOverrideCursor() self.setWindowTitle("Search Engine") self.groupBox.setEnabled(True) def slotLoadIndex(self): self.groupBox.setEnabled(False) QtGui.qApp.processEvents() self.setWindowTitle("Loading document cache and inverted index...") QtGui.qApp.processEvents() QtGui.QApplication.setOverrideCursor(QtGui.QCursor(QtCore.Qt.WaitCursor)) # Load engine self.engine.load() # Fill UI with some info self.labelIndexInfo.setText("Loaded %d documents with %d terms." % (len(self.engine.documents), len(self.engine.index))) # Restore cursor QtGui.QApplication.restoreOverrideCursor() QtGui.QApplication.restoreOverrideCursor() self.setWindowTitle("Search Engine") self.groupBox.setEnabled(True)
''' Created on Mar 21, 2016 @author: robert ''' from SearchEngine import SearchEngine if __name__ == '__main__': engine = SearchEngine() while (True): num_results = input("Please enter the number of entries you like to retrieve for each query: [10] ") if (num_results): if (num_results.isdigit()): engine.setNumResults(int(num_results)) break; else: print("Invalid input") else: break; while (True): isLoadFromCache = input("Do you want to start search engine from cached data file? [Y/n] ") if (not isLoadFromCache or isLoadFromCache.lower() == "y"): try: cache_file_path = input("Please enter full path to the cached data file:") engine.initFromCache(cache_file_path) break except Exception as e: print(e)
if __name__ == "__main__": start = time() set_conf() os.mkdir(conf.sRslPath) pWGSLT, pWGSRB = BaseProcesses.read_json_area(conf.jsonpath) unitblock_list = get_unitblock_list(pWGSLT, pWGSRB) grid_dic = {} for unitblock in unitblock_list: lsGridcode = GridCalculate.GridCodeToGridlist_iPCSType(unitblock['sGridCodeLT'],\ unitblock['sGridCodeRB'], unitblock['iPCSType']) for sGridCode in lsGridcode: lbds10kmIn = SearchEngine.SearchByRgDttmDtpd( sGridCode, conf.sDatahomePath, conf.search_time, conf.iDataProduct, conf.iCloulLevel) for lbd_time in lbds10kmIn: if lbd_time not in grid_dic.keys(): grid_dic[lbd_time] = [ gdal.Open(lbd.sPathName) for lbd in lbds10kmIn[lbd_time] ] else: grid_dic[lbd_time] += [ gdal.Open(lbd.sPathName) for lbd in lbds10kmIn[lbd_time] ] for lbd_time in grid_dic: clip_dataset_list_groupby_time(grid_dic[lbd_time], lbd_time)
Authors: David Greisler (s0531301), Paul Kitt (s0528516), Marc Lehmann(s0524790) """ from SearchEngine import SearchEngine seedURLs = [ "http://mysql12.f4.htw-berlin.de/crawl/d01.html", "http://mysql12.f4.htw-berlin.de/crawl/d06.html", "http://mysql12.f4.htw-berlin.de/crawl/d08.html" ] stopWords = [ 'd01', 'd02', 'd03', 'd04', 'd05', 'd06', 'd07', 'd08', 'a', 'also', 'an', 'and', 'are', 'as', 'at', 'be', 'by', 'do', 'for', 'have', 'is', 'in', 'it', 'of', 'or', 'see', 'so', 'that', 'the', 'this', 'to', 'we' ] search = SearchEngine(stopwords = stopWords, seed_urls = seedURLs) exit_string = "/quit" print "Enter " + exit_string + " to end the program." print query = "" while 1 == 1: query = raw_input("Search query: "); if query == exit_string: break; search.query(query); print
file = open('tmp/history_request.txt', 'a') file.write('\n QUERY \n' + str(query_text) + ' \n RESPONSE \n' + str(search_results) + '\n_______\n\n\n') file.close() search_results['lda'] = [] return Response(json.dumps(search_results, ensure_ascii=False), status=200, mimetype='application/json') def check_db_status(): db_len = 0 for _ in DBController.get_all_articles(): db_len += 1 if db_len == 0: print('Seeding database...') DatabaseSeeder.seed() if __name__ == "__main__": log_of_result = [] check_db_status() lsi = Storage.load_model('out/lsi', 'phyge', 'lsi') # lda = Storage.load_model('out/lda', 'phyge', 'lda') # d2v = Storage.load_model('out/d2v', 'phyge', 'd2v') search_engine = SearchEngine(models=[lsi]) app.run(host='0.0.0.0', port=5050)
from SearchEngine import SearchEngine from Query import Query import pickle # ai_index_documents_length = eval(open('../indices/AIindex/documents_length.txt').read()) # AIindex = SearchEngine([open('../indices/AIindex/BLOCK262'), open('../indices/AIindex/BLOCK263'), open('../indices/AIindex/BLOCK264'), open('../indices/AIindex/BLOCK265')], ai_index_documents_length) # document_frequencies = AIindex.document_frequencies() # with open('../indices/AIindex/document_frequencies.txt', 'w') as file: # file.write(str(document_frequencies)) document_frequencies = eval(open('../indices/AIindex/document_frequencies.txt').read()) concordia_ai_documents_length = eval(open('../indices/ConcordiaAI/documents_length.txt').read()) ConcordiaAI = SearchEngine([open('../indices/ConcordiaAI/BLOCK119'), open('../indices/ConcordiaAI/BLOCK120'), open('../indices/ConcordiaAI/BLOCK121'), open('../indices/ConcordiaAI/BLOCK122'), open('../indices/ConcordiaAI/BLOCK123')], concordia_ai_documents_length) while True: try: k = int(input('Enter number of returns:')) query = Query(input('Enter query:')) except Exception as ex: print(ex.args[0]) continue results = ConcordiaAI.search(query, document_frequencies, k) print('-----BM25 Ranking-----\n') print(results[0]) print('-----BM25 Ranking with AITopics Document Frequencies-----\n') print(results[2]) print('\n-----Tf-idf Ranking-----\n') print(results[1]) print('\n-----Tf-idf Ranking with AITopics Document Frequencies-----\n')
#main.py #Gaonkar, Vijay #vrgaonkar from __future__ import print_function from SearchEngine import SearchEngine search_engine = SearchEngine() results = [] choice = 0 while choice != 5: print("\n\t\t\t ############################### GAVELTON LIBRARY ############################### \n" "\n Welcome! Looking for something? I can help!") search_str = raw_input("\n Enter a word or a phrase to get started: ") print("\n How do you wanna search?\n" " 1. Search by call number\n" " 2. Search by title\n" " 3. Search by subject\n" " 4. Search by other\n" " 5. Quit\n") choice = input(" Your Choice: ") if choice == 1: results = search_engine.search_by_call_no(search_str) if len(results) > 0: print("\n\t\t\t ************************** Search Results ************************** ") for item in results: item.display()
def __init__(self, scrambledMatrix, goalMatrix): SearchEngine.__init__(self, scrambledMatrix, goalMatrix)
from SearchEngine import SearchEngine search_engine = SearchEngine() search_engine.find_matches_from_wiki('hydrogen', mode='instance')
output = {} for sub_intersetion in intersetion: sub = sub_intersetion.encode("utf-8") output[sub] = odict[sub] if output: rank = sort_by_value(output) html = render_template("search.html", ol=output, rank=rank, key=keywords) else: html = render_template("search.html", ol=None, key=False) #return jsonify({'result': rtn}) return html def sort_by_value(d): items = d.items() backitems = [[v[1], v[0]] for v in items] backitems.sort(reverse=True) return [backitems[i][1] for i in range(0, len(backitems))] if __name__ == '__main__': se = SearchEngine() se.buildTries() app.run(debug=True, port=8886)
print("\nAverages:") print("\tP@10: {:.5f}".format(avgPAtTen)) print("\tinterpolated MAP: {:.5f}".format(avgMAP)) print("\ttime: {:.5f} s".format(avgTime)) print("\tinterpolated recall points (precision, recall):") for pair in avgRecallPoints: p, r = pair print("\t({:.5f}, {:.5f}),".format(p, r)) if __name__ == '__main__': parser = createParser() args = parser.parse_args() eng = SearchEngine() if args.function == CREATE_INDEX_CMD: collectionFolder = args.path start = getTime() menuCreateIndex(eng, collectionFolder) print("It took {} s to create and save the index.".format(getTime() - start)) elif args.function == INTERACTIVE_QUERY_CMD: rankingSize = args.rSize menuInteractiveQuery(eng, rankingSize) elif args.function == PROCESS_QUERY_FILE_CMD: queryFile = args.path rankingSize = args.rSize
def main(): se = SearchEngine() option_str = displayMenu() option = int(option_str) while(option !=5): results = list() if option == 1: callNumber = input("Enter Call Number: ") results = se.search_by_call_no(callNumber) se.display(results) elif option == 2: subject = input("Enter Subject: ") results = se.search_by_subjects(subject) se.display(results) elif option == 3: title = input("Enter Title: ") results = se.search_by_title(title) se.display(results) elif option == 4: other = input("Enter Other : ") results = se.search_by_other(other) se.display(results) else: print('\n') print("Enter Correct option:") print('\n') print("Total No. of Matching Result: ", len(results)) print('\n') option_str = displayMenu() option = int(option_str)
def metaAuthors(request): se = SearchEngine() query = request.GET.get('q', '') return HttpResponse(json.dumps(se.getAuthors(query)), content_type="application/json")
action="store_true", help="Also use explanations of idioms") parser.add_argument('--sound_like', type=str, help="Specify 'sounds-like' word") parser.add_argument('--means', type=str, help="Specify 'meaning' word") parser.add_argument('--verbose', action="store_true", help="Print additional debugging info") parser.add_argument('--func', action="store_true", help="Consider function words for replacement.") cmd_args = parser.parse_args() print("Hello and welcome to the pun aid!") se = SearchEngine(1000, cmd_args.vecs, cmd_args.combo) model = se.word_vectors print(cmd_args.ortho) if cmd_args.ortho and cmd_args.rhyme: print( 'Looking for both orthographic matches and rhyming matches is not possible. Please choose one!' ) sys.exit() while True: query = input( "Start search: \"Sounds like x\" \"Has to do with y\":\n> ") query = query.split() sounds_like = query[0]
wrong_ids[model_name].append( (true_sourse, found_sourse)) output_answer.append( dict(true_sourse=true_sourse, sourse=found_sourse, model=model_name, title=answer[model_name][0]['title'], similarity=answer[model_name][0]['similarity'])) file.write(json.dumps(output_answer, indent=2, ensure_ascii=False)) file.write(json.dumps(wrong_ids, indent=2, ensure_ascii=False)) if __name__ == "__main__": log_of_result = [] if len(DBController.get_all_documents()) == 0: print('Seeding database...') DatabaseSeeder.seed() lsi = Storage.load_model('out/lsi', 'phyge', 'lsi') lda = Storage.load_model('out/lda', 'phyge', 'lda') d2v = Storage.load_model('out/d2v', 'phyge', 'd2v') fast_text = Storage.load_model('out/fast_text', 'phyge', 'ft') search_engine = SearchEngine(models=[fast_text, d2v, lda, lsi]) test_path = os.path.join(PhyVariables.testsDir, 'test_' + str(PhyVariables.queriesId)) run_search(os.path.join(test_path, PhyVariables.queriesFileName), os.path.join(test_path, PhyVariables.answersFileName), 1) # run_search('Resources/pdf_articles.json','Resources/answers.json',1)
class ManipulatePyExercise: def __init__(self): self.theFolderName = 'true_or_false_question' #self.foldersNames = ['qom_questions_transformer','true_or_false_question'] self.filesNames = [ 'true_or_false_question.tex', 'program.py', 'util_make_random_versions.py' ] self.questionpdf = 'true_or_false_question.pdf' self.questionpng = 'true_or_false_question.png' self.linesNamepdf = ['answer_', '_true.pdf', '_false.pdf'] self.linesNamepng = ['answer_', '_true.png', '_false.png'] self.questionFolder = 'question' self.versionFolder = 'version_' self.directory = '' self.filePath = '' self.searchEngine = SearchEngine() #diference finder self.pyFilesNames = ['full_program.py', 'answers_program.py'] self.pyProgram = 'program.py' self.pyPrograms = ['program', '.py'] self.texFilesNames = ['true_or_false_question.tex'] self.texAnswerNames = ['answer_', '_false.tex', '_true.tex'] self.folders = [] self.files = [] def seeIfExercise(self, path): #verificar se a pasta tem os ficheiros necessários para um exercício self.searchEngine.setDirectory(path) #colocar na diretoria files = self.searchEngine.getAllFiles() #ir buscar ficheiros if self.isFilesExercise(files): #verificar ficheiros return True return False def isFolderExercise(self, folders): #not in use counter = 0 for folder in folders: #verificar se existe um folder em que o nome se inicia por 'true_or_false_question' for name in self.foldersNames: if folder[:len(name)] == name: counter += 1 if name == self.theFolderName: #se o folder do exercicio existir self.filePath = folder #guardar nova diretoria para ser usada if counter == len( self.foldersNames ): #se todos os folders estiverem na pasta é uma pasta exercício return True return False def isFilesExercise(self, files): #verificar se o folder tem os ficheiros necessários ao exercício counter = 0 for file in files: for name in self.filesNames: if file == name: counter += 1 if counter == len(self.filesNames): return True return False def getExerciseImg(self): #verificar se o pdf existe if self.searchEngine.fileExists(self.questionpdf): #verificar se a imagem já existe if not self.searchEngine.fileExists(self.questionpng): pages = convert_from_path(self.questionpdf, 500) for page in pages: page.save(self.questionpng, 'PNG') file = open(self.questionpng, 'rb') img = file.read() return img else: print('Não foi possivél encontrar o pdf do enunciado') return def getExerciseLinesImg(self): #retorna as magens png das alineas done = False lines = [] i = 0 while done == False: i = i + 1 name_true = self.linesNamepdf[0] + str(i) + self.linesNamepdf[1] name_false = self.linesNamepdf[0] + str(i) + self.linesNamepdf[2] #verificar se o ficheiro pdf existe if self.searchEngine.fileExists( name_true) and self.searchEngine.fileExists(name_false): #se existir verificar de o png existe new_pages1 = self.linesNamepng[0] + str( i) + self.linesNamepng[1] new_pages2 = self.linesNamepng[0] + str( i) + self.linesNamepng[2] #veriricar se os png já existem, se não criar if not self.searchEngine.fileExists(new_pages1): #se não existir vamos criar pages1 = convert_from_path(name_true, 500) pages2 = convert_from_path(name_false, 500) #guardar png for page in pages1: page.save(new_pages1, 'PNG') for page in pages2: page.save(new_pages2, 'PNG') #ir buscar png e guardar file_true = open(new_pages1, 'rb') img1 = file_true.read() file_false = open(new_pages2, 'rb') img2 = file_false.read() lines.append(img1) lines.append(img2) #se não existe terminar else: if i == 1: print('Não foi possivél encontrar os pdfs das alineas') return done = True return lines def getExerciseName(self): return self.searchEngine.getLastFolder() def getPyFilesNames(self): files = [] #procurar programs.py if self.searchEngine.fileExists(self.pyProgram): files.append(self.pyProgram) i = 2 while self.searchEngine.fileExists(self.pyPrograms[0] + str(i) + self.pyPrograms[1]): files.append(self.pyPrograms[0] + str(i) + self.pyPrograms[1]) i = i + 1 for file in self.pyFilesNames: if self.searchEngine.fileExists(file): files.append(file) return files def getTexFilesNames(self): files = [] for file in self.texFilesNames: if self.searchEngine.fileExists(file): files.append(file) i = 1 while self.searchEngine.fileExists(self.texAnswerNames[0] + str(i) + self.texAnswerNames[1]): files.append(self.texAnswerNames[0] + str(i) + self.texAnswerNames[1]) files.append(self.texAnswerNames[0] + str(i) + self.texAnswerNames[2]) i = i + 1 return files def seeIfHasVersions(self): #verificar se o exercicio tem a pasta question if self.searchEngine.folderExists(self.questionFolder): self.searchEngine.goFoward(self.questionFolder) #verificar se existe pelo menos um folder de versões if self.searchEngine.folderExists(self.versionFolder + '1'): #voltar a traz e retornar true self.searchEngine.goBack() return True return False def getVersionsNames(self): i = 1 names = [] self.searchEngine.goFoward(self.questionFolder) while self.searchEngine.folderExists(self.versionFolder + str(i)): names.append(self.versionFolder + str(i)) i = i + 1 self.searchEngine.goBack() return names def getVersionImgs(self, version_number): imgs = [] self.searchEngine.goFoward(self.questionFolder) self.searchEngine.goFoward(self.versionFolder + version_number) #ir buscar enunciado imgs.append(self.getExerciseImg()) #ir buscar alineas imgs.append(self.getExerciseLinesImg()) self.searchEngine.goBack() self.searchEngine.goBack() return imgs def getPyFileDifferences(self, fileName): return pythonHTML(fileName) def getTexFileDifference(self, fileName): return questionHTML(fileName) def changeNumberOfVersions(self, number): with open("make_random_versions.py", "r") as f: lines = f.readlines() with open("make_random_versions.py", "w") as f: for line in lines: if not ("number_of_versions = " in line.strip("\n")): f.write(line) else: f.write('number_of_versions = ' + str(number) + '\n') def generateVersions(self): try: self.searchEngine.runFile('make_random_versions.py') self.searchEngine.runFile('make_pdfs.py') except: print('Ocorreu uma excepção na geração de versões')
class Database: def __init__(self, file = ".database.pkl"): """ Initializes a new Database Parameters ---------- file: String path to database file (.pkl) Returns ------- None """ self.file = file #database file try: self.engine, self.entities = self.getDatabase() print("Loaded from database", flush = True) except EOFError: self.engine = SearchEngine() self.entities = {} def __repr__(self): return "{}".format(self.engine.raw_text.keys()) def addDocument(self, text_id, text): """ Adds a document to the database Parameters ---------- id: id of document to add text: rawtext Returns ------- None """ #TODO: Finish self.engine.add(text_id, text) entities_list = self.engine.getEntities(text) self.entities[text_id]= entities_list for entity in entities_list: self.engine.inverted_index[entity].add(text_id) self.updateDatabase() def updateDatabase(self): """ Writes self.engine and self.entities to database file Parameters ---------- None Returns ------- None """ with open(self.file, "wb") as f: pickle.dump((self.engine, self.entities), f) def getDatabase(self): """ Retrieves engine and entities from saved database file Parameters ---------- None Returns ------- Tuple of (engine, entities), where music and songs are both dictionaries """ with open(self.file, "rb") as f: engine, entities = pickle.load(f) return (engine, entities) def removeDocument(self, text_id): """ Removes a document from the database Parameters ---------- text_id: string text_id of document to remove Returns ------- None """ self.engine.remove(text_id) for entity in self.entities[text_id]: if text_id in self.engine.inverted_index[entity]: self.engine.inverted_index[entity].remove(text_id) del self.entities[text_id] self.updateDatabase() def clear(self): """ Clears the database Parameters ---------- None Returns ------- None """ self.engine = SearchEngine() self.entities = {} self.updateDatabase()
from flask import Flask, request from flask_cors import CORS, cross_origin from SearchEngine import SearchEngine from QueryResponse import QueryResponse, from_query_result import os # --------------------------------| # In order to execute run: | # `python server.py` | # --------------------------------| engine = SearchEngine() app = Flask(__name__, static_url_path="", static_folder="resources/web/static") app.config["CORS_HEADERS"] = "Content-Type" CORS(app) @app.route("/") def root(): return app.send_static_file("index.html") @app.route("/query", methods=["GET"]) def query(): query_text = request.args["query"] svd_param = request.args.get("svd") k_largest = request.args.get("k") k = 20 if k_largest:
def __init__(self): self.search_engine = SearchEngine()
def setUp(self): self.se = SearchEngine(1000, 'word2vec.glove.6B.300d.txt', 'sum', forcebin=True)
#!/usr/bin/python from __future__ import print_function from SearchEngine import SearchEngine import sys choice = 0 se = SearchEngine() if len(sys.argv) is not None: argument = str(sys.argv) if "book" in argument: se.parse_book() if "film" in argument: se.parse_film() if "period" in argument: se.parse_periodic() if "video" in argument: se.parse_video() while choice!= 5: print("__________________Welcome_________________") print("1.Search by Call Number") print("2.Search by Title") print("3.Search by Subject") print("4.Search by Others") print("5.Exit") print("__________________________________________") choice = input("Please enter your choice:") if choice == 1: