def test_DocumentDataBase(self): testDb = SearchEngine.DocumentDataBase() testDocument = SearchEngine.Document("testID", "testTitle", "testBody") testDocument2 = SearchEngine.Document("testID2", "testTitle2", "testBody2") testDb.AddFile(testDocument) testDb.AddFile(testDocument2) self.assertTrue(testDocument.id in testDb.docDataBase) with self.assertRaises(ValueError): testDb.AddFile(testDocument) res = testDb.GetFile(testDocument.id) self.assertEqual(res, testDocument.title)
def grep(text, io=None, flist=None): root = text._root() engine = SearchEngine.get(root) if not hasattr(engine, "_grepdialog"): engine._grepdialog = GrepDialog(root, engine, flist) dialog = engine._grepdialog dialog.open(io)
def setUp(cls) -> None: try: cls.test_df = pd.read_csv("../TestData/so2_chicago_data.txt") except: print("Failed to open TestData/so2_chicago_data.txt") cls.engine = SearchEngine.searchEngine()
def do_replace(self): prog = self.engine.getprog() if not prog: return False text = self.text try: first = pos = text.index("sel.first") last = text.index("sel.last") except TclError: pos = None if not pos: first = last = pos = text.index("insert") line, col = SearchEngine.get_line_col(pos) chars = text.get("%d.0" % line, "%d.0" % (line+1)) m = prog.match(chars, col) if not prog: return False new = m.expand(self.replvar.get()) text.mark_set("insert", first) text.undo_block_start() if m.group(): text.delete(first, last) if new: text.insert(first, new) text.undo_block_stop() self.show_hit(first, text.index("insert")) self.ok = 0 return True
def replace(text): root = text._root() engine = SearchEngine.get(root) if not hasattr(engine, "_replacedialog"): engine._replacedialog = ReplaceDialog(root, engine) dialog = engine._replacedialog dialog.open(text)
def do_replace(self): prog = self.engine.getprog() if not prog: return False text = self.text try: first = pos = text.index("sel.first") last = text.index("sel.last") except TclError: pos = None if not pos: first = last = pos = text.index("insert") line, col = SearchEngine.get_line_col(pos) chars = text.get("%d.0" % line, "%d.0" % (line + 1)) m = prog.match(chars, col) if not prog: return False new = m.expand(self.replvar.get()) text.mark_set("insert", first) text.undo_block_start() if m.group(): text.delete(first, last) if new: text.insert(first, new) text.undo_block_stop() self.show_hit(first, text.index("insert")) self.ok = 0 return True
def grep(text, io=None, flist=None): root = text._root() engine = SearchEngine.get(root) if not hasattr(engine, "_grepdialog"): engine._grepdialog = GrepDialog(root, engine, flist) dialog = engine._grepdialog searchphrase = text.get("sel.first", "sel.last") dialog.open(text, searchphrase, io)
def test_Index(self): testDb = SearchEngine.DocumentDataBase() testIndex = SearchEngine.Index(testDb) testDocument = SearchEngine.Document( "testID", "testTitle", "testBody This is a test file without punctuation") testDocument2 = SearchEngine.Document( "testID2", "testTitle2", "testBody2 this is a test file with punctuation.") testIndex.IndexDocument(testDocument) self.assertTrue(testDocument.title.lower() in testIndex.index.keys()) self.assertTrue( testDocument.id in testIndex.index[testDocument.title.lower()]) for i in testDocument.body.split(): self.assertTrue(i.lower() in testIndex.index.keys()) self.assertTrue(testDocument.id in testIndex.index[i.lower()]) testIndex.IndexDocument(testDocument2) self.assertTrue(testDocument2.title.lower() in testIndex.index.keys()) self.assertTrue( testDocument2.id in testIndex.index[testDocument2.title.lower()]) for i in testDocument2.body.split(): if i == "punctuation.": i = "punctuation" self.assertTrue(i.lower() in testIndex.index.keys()) self.assertTrue(testDocument2.id in testIndex.index[i.lower()]) res = testIndex.LookUp("This") self.assertEqual(res, {'testID', 'testID2'}) res = testIndex.LookUp("this") self.assertEqual(res, {'testID', 'testID2'}) res = testIndex.LookUp("testTitle") self.assertEqual(res, {'testID'}) res = testIndex.LookUp("punctuation") self.assertEqual(res, {'testID', 'testID2'}) res = testIndex.LookUp("with") self.assertEqual(res, {'testID2'}) res = testIndex.LookUp("no") self.assertEqual(res, [])
def result(): value = [] if request.method == "POST": word = request.form.get("word") word.lower() if "*" not in word: value,words = SearchEngine.Search_pro(word) values = printja.printja(value) return render_template("result.html", word=word , weblink=values ,words=words) else: value,words = SearchWildcard.Search_pro_wild(word) values = printja.printja(value) return render_template("result.html", word=word , weblink=values, words=words)
def __init__(self, *args, **kw): super(SearchGUI, self).__init__(*args, **kw) self.InitUI() #flags indicating checkbox states. self.enrichFlag = False self.SVDFlag = False self.metric = COSINE #self.searchEng = SearchEngine() try: self.searchEng = SearchEngine() except Exception as e: self.errorMessage("Error: Feature vectors have not been created!")
def get_best_triplets_by_searches(best_triplets, movie_name): """ :param best_triplets: :param movie_name: :return: """ max_searches = 0 best_triplet = "" for i in range(len(best_triplets)): trip = best_triplets[i][0] query = "\""+movie_name+"\" \""+trip+"\"" cur_searches = se.get_query_total_results(query) if (cur_searches > max_searches): max_searches = cur_searches best_triplet = trip return best_triplet,max_searches
def search(): """ Display the search results, 10 items at a time. The user can browse to the previous page or the next page. """ search_term = request.query.search_term site_name = request.query.site_name page_number = int(request.query.page_number or '1', 10) is_tag = request.query.is_tag #Is the search_term a tag, or is it a free search? # If this is the first page, there's no previous page. if page_number == 1: prev_page = None else: prev_page = page_number-1 #Load the pointer to the site's search engine index. index_pointer = index_pointers[site_name] search_results , is_last_page = SearchEngine.get_search_results(index_pointer, search_term, page_number, site_name, is_tag) # If this is the last page of results, there's no next page. if is_last_page: next_page = None else: next_page = page_number+1 return template('search_results', current_page_num = page_number, prev_page = prev_page, next_page = next_page, site_name = site_name, is_tag = is_tag, search_term = search_term, search_results = search_results)
def parse_search_table_row(row, quote_id): """ :param row: :param quote_id: :return: """ quote = row[1] movie_name = row[2] quote = quote_to_list(quote) quote_str = "" max_searches = 0 max_line = "" #going over all lines, to see what score they get, and whether we are done: for line in quote: if (line == ""): continue #checking if we reached the end: m = INTERESTING_REG_PATTERN.match(line) if (m): #reached the end of the quote break else: quote_str += line+"\n" #removing the character name from the quote id = line.find(":") + 1 query = "\""+movie_name+"\" \"" +line[id:]+"\"" try: cur_searches = se.get_query_total_results(query) except: print("the id we got stuck in is " + str(quote_id)) raise if (cur_searches > max_searches): max_searches = cur_searches max_line = line return (quote_str,max_line, max_searches)
def result(): page = request.args.get('page', 1, type=int) if page > 0: page -= 1 if request.method == 'POST': result = request.form query = result['query'] if 'resultAmount' not in result.keys(): resultAmount = 15 else: resultAmount = int(result['resultAmount']) else: query = request.args.get('query', '', type=str) resultAmount = request.args.get('resultAmount', 0, type=int) # search engine! search = SearchEngine.searchEngine(query, 200, indexMarkers, docIDMarkers, stopWords, stemmer) if search == []: return render_template("result.html", valid=False) else: time = round(search[0], 2) infoList = search[1] totalResultAmount = search[2] if totalResultAmount < resultAmount: resultAmount = totalResultAmount # pagination (we are starting from page 1) offset = page * resultAmount infoList = infoList[offset:offset + resultAmount] page += 1 return render_template("result.html", valid=True, page=page, query=query, resultAmount=resultAmount, totalResultAmount=totalResultAmount, infoList=infoList, time=time)
def test_one_document_type(self): parser = iE.create_parser() arg = parser.parse_args(['dummy query', '-text']) self.assertEqual(arg.document_types, ['text'])
import string
def _setup(text): root = text._root() engine = SearchEngine.get(root) if not hasattr(engine, "_searchdialog"): engine._searchdialog = SearchDialog(root, engine) return engine._searchdialog
orders = [0,1,2] bmas = [["diamond","/home/felsamps/Tcc/cache-mvc/inDiamondTZ.txt"], ["square", "/home/felsamps/Tcc/cache-mvc/inSquareTZ.txt"]] sets = [1, 2, 3, 4, 5, 6, 7, 8] blocks = [[16,16], [20,20], [40,40], [32,24], [40, 30]] caches = [[2,2], [3,3], [4,4], [5,5], [6,6], [7,7], [8,8], [9,9], [10,10], [3,4], [4,5], [5,6], [3,5]] stats = [] if __name__ == "__main__": i = 1 total = len(orders) * len(bmas) * len(sets) * len(blocks) * len(caches) for order in orders: for bma in bmas: for set in sets: for block in blocks: for cache in caches: config = ConfigFile("fake") config.initConfigs(w, h, block[0], block[1], set, cache[0]*cache[1], cache[0], cache[1], order, bma[0]) trace = TraceFile(bma[1]) print "Executando configuracao " , i, "de", total engine = SearchEngine(trace, config) engine.process() stats.append(engine.getStats()) i += 1 fp = open("/home/felsamps/Tcc/cache-mvc/results/results.csv","w") stats[0].printHeader(fp) for result in stats: # @type result Stats result.reportFile(fp)
from flask import Flask, render_template, url_for, request import SearchEngine app = Flask(__name__) indexMarkers, docIDMarkers, stopWords, stemmer = SearchEngine.preliminary() @app.route('/') def index(): return render_template('index.html') @app.route('/result', methods=['POST', 'GET']) def result(): page = request.args.get('page', 1, type=int) if page > 0: page -= 1 if request.method == 'POST': result = request.form query = result['query'] if 'resultAmount' not in result.keys(): resultAmount = 15 else: resultAmount = int(result['resultAmount']) else: query = request.args.get('query', '', type=str) resultAmount = request.args.get('resultAmount', 0, type=int) # search engine! search = SearchEngine.searchEngine(query, 200, indexMarkers, docIDMarkers, stopWords, stemmer) if search == []:
def test_steam_miss(test_list): test_num = 200 auc_num = 0 alph = random.sample('qwertyuiopasdfghjklzxcvbnm', 1) for ids in test_list.keys(): game_name = test_list[ids] num = random.randint(0, len(game_name) - 1) change = game_name[num] new_name = game_name.replace(change, ''.join(alph), 1) results = get_search_results( "https://store.steampowered.com/search/?term=", new_name) if results is None: continue if len(results) > 10: results = results[:10] for game in results: if str(game['game_id']) == str(ids): auc_num += 1 break print('test_num:', test_num, 'accuracy', auc_num / test_num) if __name__ == "__main__": CACHE_DICT = open_cache() PS = PorterStemmer() search_engine = SearchEngine(config, word_tokenize, PS, isStemming=False) # test_list = search_engine.test_auc_zh_steam() # test_steam_miss(test_list) app.run(debug=True)
def test_no_document_types(self): parser = iE.create_parser() arg = parser.parse_args(['dummy query']) self.assertEqual(arg.document_types, None)
parser.add_argument("--mode", choices=["train","eval","repr_code","search"], default='train', help="The mode to run. The `train` mode trains a model;" " the `eval` mode evaluate models in a test set " " The `repr_code/repr_desc` mode computes vectors" " for a code snippet or a natural language description with a trained model.") parser.add_argument("--gen", type=int, default='3', help="Number of GA generation") parser.add_argument("--chunk_size", type=int, default='20', help="Number of inputs") parser.add_argument("--mutation_rate", type=float, default='0.05', help="Mutation Rate") parser.add_argument("--verbose",action="store_true", default=True, help="Be verbose") return parser.parse_args() if __name__ == '__main__': args = parse_args() config = getattr(configs, 'config_'+args.model)() engine = SearchEngine.SearchEngine(args, config) ##### Define model ###### logger.info('Build Model') #tf.compat.v1.global_variables_initializer() model = getattr(models, args.model)(config) # initialize the model model.build() model.summary(export_path = "./output/{}/".format(args.model)) optimizer = config.get('training_params', dict()).get('optimizer', 'adam') model.compile(optimizer=optimizer) data_path = args.data_path+args.dataset+'/'
import sys from PySide2.QtCore import * from PySide2.QtGui import * from PySide2.QtWidgets import * from vmmpy import * sys.path.append("RemoteMemoryScanner") from SearchEngine import * from UserInterface import * if __name__ == "__main__": app = QApplication(sys.argv) search_engine = SearchEngine() user_interface = UserInterface(search_engine) user_interface.main_window.show() sys.exit(app.exec_())
__author__="felsamps" __date__ ="$16/07/2010 15:27:49$" import sys from Cache import * from ConfigFile import * from MMU import * from TraceFile import * from SearchEngine import * if __name__ == "__main__": configFile = ConfigFile(sys.argv[1]) configFile.parseFile() traceFile = TraceFile(sys.argv[2]) me = SearchEngine(traceFile, configFile) me.process()
def test_query(self): parser = iE.create_parser() arg = parser.parse_args(['dummy query']) self.assertEqual(arg.query, 'dummy query')
from Tkinter import *
import pywikibot import RevisionPuller as RP import SearchEngine as SE import PageProcessor as PP engine = SE.SearchEngine() processor = PP.PageProcessor() def get_readable_text_of_old_revision(page_title: str, rev_id: int): """ Returns a string containing a "readable" version of a revision :param page_title: A string of the page title :param rev_id: The revision number of the desired revision :return: A string of the revision's readable text """ page = engine.search(page_title, 1, "nearmatch")[0] return processor.getReadableText(RP.get_text_of_old_revision(page, rev_id)) def get_revisions(page_title: str, recent_to_oldest: bool = True, num_revisions=None, start_time: pywikibot.Timestamp = None, end_time: pywikibot.Timestamp = None): """ Returns the last (num_revisions) revisions from a given Wikipedia page If all revisions are desired use: get_latest_revisions(page) :param page_title: A string containing the title of the desired page :param recent_to_oldest: Set to false if we want the revisions in order of oldest to most recent :param num_revisions: The number of revisions to be grabbed (set to an integer to set limit to number of revisions grabbed)
class SearchGUI(wx.Frame): def __init__(self, *args, **kw): super(SearchGUI, self).__init__(*args, **kw) self.InitUI() #flags indicating checkbox states. self.enrichFlag = False self.SVDFlag = False self.metric = COSINE #self.searchEng = SearchEngine() try: self.searchEng = SearchEngine() except Exception as e: self.errorMessage("Error: Feature vectors have not been created!") def InitUI(self): ''' Defining all the widgets. ''' self.panel = wx.Panel(self) #Label indicating query. self.label = wx.StaticText(self.panel, label="What do you want to search for: ", size = (200, -1)) #Text box for the query. self.queryBox = wx.TextCtrl(self.panel, size=(190, -1)) self.checkPanel = wx.Panel(self.panel) #check box for query enrich self.enrichCheck = wx.CheckBox(self.checkPanel, label='Enrich Query', pos=(20, 20)) self.enrichCheck.SetValue(False) self.enrichCheck.Bind(wx.EVT_CHECKBOX, self.changeEnrichFlag) #check box for SVD self.svdCheck = wx.CheckBox(self.checkPanel, label='SVD', pos=(20, 40)) self.svdCheck.SetValue(False) self.svdCheck.Bind(wx.EVT_CHECKBOX, self.changeSVDFlag) #Radio buttons for similarity metrics self.rbPanel = wx.Panel(self.panel) self.rb1 = wx.RadioButton(self.rbPanel, label='Cosine', pos=(10, 10), style=wx.RB_GROUP) self.rb2 = wx.RadioButton(self.rbPanel, label='Norm', pos=(10, 30)) self.rb3 = wx.RadioButton(self.rbPanel, label='Chebyshev', pos=(10, 50)) self.rb4 = wx.RadioButton(self.rbPanel, label='Correlation', pos=(10, 70)) self.rb1.Bind(wx.EVT_RADIOBUTTON, self.SetVal) self.rb2.Bind(wx.EVT_RADIOBUTTON, self.SetVal) self.rb3.Bind(wx.EVT_RADIOBUTTON, self.SetVal) self.rb4.Bind(wx.EVT_RADIOBUTTON, self.SetVal) #Search button self.searchButton = wx.Button(self.panel, label='Search', pos=(20, 30)) self.searchButton.Bind(wx.EVT_BUTTON, self.executeSearch) #managing layout self.sizer = wx.GridBagSizer(3, 2) self.sizer.Add(self.label, (0, 0)) self.sizer.Add(self.queryBox, (0, 1)) self.sizer.Add(self.checkPanel, (1, 0)) self.sizer.Add(self.rbPanel, (1, 1)) self.sizer.Add(self.searchButton, (2, 1)) # Use the sizers self.panel.SetSizerAndFit(self.sizer) self.SetSize((420,220)) self.SetTitle('Search Web Service') self.Centre() self.Show(True) def executeSearch(self, e): ''' Execute the query depending on the enrich and SVD flags. ''' qString = self.queryBox.GetValue() if not qString: self.errorMessage("Your query is empty!") else: if self.enrichFlag: qString = enrich(qString) try: start = time.time() if self.SVDFlag: sortedFileDist = self.searchEng.svdSearch(qString, self.metric) else: sortedFileDist = self.searchEng.normalSearch(qString, self.metric) end = time.time() htmlPresenter(sortedFileDist, HTML_DIR, qString, end-start) self.successMessage() except Exception as e: self.errorMessage(str(e)) def changeSVDFlag(self, e): sender = e.GetEventObject() self.SVDFlag = sender.GetValue() if self.SVDFlag: self.rbPanel.Disable() else: self.rbPanel.Enable() def changeEnrichFlag(self, e): sender = e.GetEventObject() self.enrichFlag = sender.GetValue() def SetVal(self, e): state1 = self.rb1.GetValue() state2 = self.rb2.GetValue() state3 = self.rb3.GetValue() state4 = self.rb4.GetValue() if state1: self.metric = COSINE elif state2: self.metric = NORM elif state3: self.metric = CHEBYSHEV else: self.metric = CORRELATION def errorMessage(self, msg): dial = wx.MessageDialog(None, msg, 'Error', wx.OK | wx.ICON_ERROR) dial.ShowModal() def successMessage(self): dial = wx.MessageDialog(None, 'Results are generated', 'Info', wx.OK) dial.ShowModal()
#一个简单的搜索测试页面 import cgi,os import SearchEngine #导入搜索引擎模块 import NeuralNetwork #导入神经网络模块 print "Content-type:text/html" print print """ <!DOCTYPE> <HTML> <head> <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> <title>A easy search engine</title> </head> <body>""" form=cgi.FieldStorage() q=cgi.escape(form["query"].value) if form.has_key("query"): #爬去网页,建立索引 crawler=SearchEngine.Crawler("db_search.db") #crawler.create_index_tables() #运行一次即可 crawler.make_stopwords(stopwords_file="stopwords.txt") pages=["http://book.douban.com/"] #预先准备好的url crawler.crawl(pages,depth=1) crawler.cal_pagerank(iterations=15) searcher=SearchEngine.Searcher("db_search.db") mynet=NeuralNetwork.SearchNet("db_network.db") #mynet.make_tables() #运行一次即可 mynet.train_query(searcher.query(q))
# use default values. try: ip = sys.argv[1] port = sys.argv[2] print "Starting in production Mode: {0}, {1}.".format(ip, port) except IndexError: print "Starting in development Mode: localhost, 8000." ip = 'localhost' port = 8000 # The metadata_shelve holds information about available sites and their sizes, # and the tags for each site, and their sizes. # {site_name: (sites metadata, tags metadata)} metadata_shelve = shelve.open('../Metadata/metadata.db', protocol = -1) s_e_sites = [] tags_dict = {} site_names = metadata_shelve.keys() for site_name in site_names: s_e_sites.append((site_name, metadata_shelve[site_name][0])) tags_dict.update({site_name: metadata_shelve[site_name][1]}) metadata_shelve.close() #Scan the /Index directory for all the available search engine indexes. index_pointers = SearchEngine.get_all_index_pointers('../Index', site_names) #Run the webserver run(host = ip, port = port, debug = True)