Example #1
0
 def test_DocumentDataBase(self):
     testDb = SearchEngine.DocumentDataBase()
     testDocument = SearchEngine.Document("testID", "testTitle", "testBody")
     testDocument2 = SearchEngine.Document("testID2", "testTitle2",
                                           "testBody2")
     testDb.AddFile(testDocument)
     testDb.AddFile(testDocument2)
     self.assertTrue(testDocument.id in testDb.docDataBase)
     with self.assertRaises(ValueError):
         testDb.AddFile(testDocument)
     res = testDb.GetFile(testDocument.id)
     self.assertEqual(res, testDocument.title)
Example #2
0
def grep(text, io=None, flist=None):
    root = text._root()
    engine = SearchEngine.get(root)
    if not hasattr(engine, "_grepdialog"):
        engine._grepdialog = GrepDialog(root, engine, flist)
    dialog = engine._grepdialog
    dialog.open(io)
    def setUp(cls) -> None:
        try:
            cls.test_df = pd.read_csv("../TestData/so2_chicago_data.txt")
        except:
            print("Failed to open TestData/so2_chicago_data.txt")

        cls.engine = SearchEngine.searchEngine()
Example #4
0
 def do_replace(self):
     prog = self.engine.getprog()
     if not prog:
         return False
     text = self.text
     try:
         first = pos = text.index("sel.first")
         last = text.index("sel.last")
     except TclError:
         pos = None
     if not pos:
         first = last = pos = text.index("insert")
     line, col = SearchEngine.get_line_col(pos)
     chars = text.get("%d.0" % line, "%d.0" % (line+1))
     m = prog.match(chars, col)
     if not prog:
         return False
     new = m.expand(self.replvar.get())
     text.mark_set("insert", first)
     text.undo_block_start()
     if m.group():
         text.delete(first, last)
     if new:
         text.insert(first, new)
     text.undo_block_stop()
     self.show_hit(first, text.index("insert"))
     self.ok = 0
     return True
Example #5
0
def grep(text, io=None, flist=None):
    root = text._root()
    engine = SearchEngine.get(root)
    if not hasattr(engine, "_grepdialog"):
        engine._grepdialog = GrepDialog(root, engine, flist)
    dialog = engine._grepdialog
    dialog.open(io)
Example #6
0
def replace(text):
    root = text._root()
    engine = SearchEngine.get(root)
    if not hasattr(engine, "_replacedialog"):
        engine._replacedialog = ReplaceDialog(root, engine)
    dialog = engine._replacedialog
    dialog.open(text)
Example #7
0
 def do_replace(self):
     prog = self.engine.getprog()
     if not prog:
         return False
     text = self.text
     try:
         first = pos = text.index("sel.first")
         last = text.index("sel.last")
     except TclError:
         pos = None
     if not pos:
         first = last = pos = text.index("insert")
     line, col = SearchEngine.get_line_col(pos)
     chars = text.get("%d.0" % line, "%d.0" % (line + 1))
     m = prog.match(chars, col)
     if not prog:
         return False
     new = m.expand(self.replvar.get())
     text.mark_set("insert", first)
     text.undo_block_start()
     if m.group():
         text.delete(first, last)
     if new:
         text.insert(first, new)
     text.undo_block_stop()
     self.show_hit(first, text.index("insert"))
     self.ok = 0
     return True
Example #8
0
def replace(text):
    root = text._root()
    engine = SearchEngine.get(root)
    if not hasattr(engine, "_replacedialog"):
        engine._replacedialog = ReplaceDialog(root, engine)
    dialog = engine._replacedialog
    dialog.open(text)
Example #9
0
def grep(text, io=None, flist=None):
    root = text._root()
    engine = SearchEngine.get(root)
    if not hasattr(engine, "_grepdialog"):
        engine._grepdialog = GrepDialog(root, engine, flist)
    dialog = engine._grepdialog
    searchphrase = text.get("sel.first", "sel.last")
    dialog.open(text, searchphrase, io)
Example #10
0
 def test_Index(self):
     testDb = SearchEngine.DocumentDataBase()
     testIndex = SearchEngine.Index(testDb)
     testDocument = SearchEngine.Document(
         "testID", "testTitle",
         "testBody This is a test file without punctuation")
     testDocument2 = SearchEngine.Document(
         "testID2", "testTitle2",
         "testBody2 this is a test file with punctuation.")
     testIndex.IndexDocument(testDocument)
     self.assertTrue(testDocument.title.lower() in testIndex.index.keys())
     self.assertTrue(
         testDocument.id in testIndex.index[testDocument.title.lower()])
     for i in testDocument.body.split():
         self.assertTrue(i.lower() in testIndex.index.keys())
         self.assertTrue(testDocument.id in testIndex.index[i.lower()])
     testIndex.IndexDocument(testDocument2)
     self.assertTrue(testDocument2.title.lower() in testIndex.index.keys())
     self.assertTrue(
         testDocument2.id in testIndex.index[testDocument2.title.lower()])
     for i in testDocument2.body.split():
         if i == "punctuation.":
             i = "punctuation"
         self.assertTrue(i.lower() in testIndex.index.keys())
         self.assertTrue(testDocument2.id in testIndex.index[i.lower()])
     res = testIndex.LookUp("This")
     self.assertEqual(res, {'testID', 'testID2'})
     res = testIndex.LookUp("this")
     self.assertEqual(res, {'testID', 'testID2'})
     res = testIndex.LookUp("testTitle")
     self.assertEqual(res, {'testID'})
     res = testIndex.LookUp("punctuation")
     self.assertEqual(res, {'testID', 'testID2'})
     res = testIndex.LookUp("with")
     self.assertEqual(res, {'testID2'})
     res = testIndex.LookUp("no")
     self.assertEqual(res, [])
Example #11
0
def result():
    value = []
    if request.method == "POST":
        word = request.form.get("word")
        word.lower()

        if "*" not in word:
           value,words = SearchEngine.Search_pro(word)
           values = printja.printja(value)
           return render_template("result.html", word=word
                                   , weblink=values ,words=words)
        else:
            value,words = SearchWildcard.Search_pro_wild(word)
            values = printja.printja(value)
            return render_template("result.html", word=word
                                   , weblink=values, words=words)
Example #12
0
    def __init__(self, *args, **kw):
        super(SearchGUI, self).__init__(*args, **kw) 
        
        self.InitUI()

        #flags indicating checkbox states.
        self.enrichFlag = False
        self.SVDFlag = False
        self.metric = COSINE

        #self.searchEng = SearchEngine()
        
        try:
            self.searchEng = SearchEngine()
        except Exception as e:
            self.errorMessage("Error: Feature vectors have not been created!")
Example #13
0
def get_best_triplets_by_searches(best_triplets, movie_name):
    """

    :param best_triplets:
    :param movie_name:
    :return:
    """
    max_searches = 0
    best_triplet = ""
    for i in range(len(best_triplets)):
        trip = best_triplets[i][0]
        query = "\""+movie_name+"\" \""+trip+"\""
        cur_searches = se.get_query_total_results(query)
        if (cur_searches > max_searches):
            max_searches = cur_searches
            best_triplet = trip
    return best_triplet,max_searches
Example #14
0
def search():
    """ Display the search results, 10 items at a time.
        The user can browse to the previous page or the next page.
    """

    search_term   = request.query.search_term
    site_name     = request.query.site_name
    page_number   = int(request.query.page_number or '1', 10)
    is_tag        = request.query.is_tag #Is the search_term a tag, or is it a free search?

    # If this is the first page, there's no previous page.
    if page_number == 1:
        prev_page = None
    else:
        prev_page = page_number-1

    #Load the pointer to the site's search engine index.
    index_pointer = index_pointers[site_name]

    search_results , is_last_page = SearchEngine.get_search_results(index_pointer, 
                                                                    search_term, 
                                                                    page_number, 
                                                                    site_name, 
                                                                    is_tag)
   
    # If this is the last page of results, there's no next page. 
    if is_last_page:
        next_page = None
    else:
        next_page = page_number+1
    
    return template('search_results', current_page_num  = page_number, 
                                      prev_page         = prev_page, 
                                      next_page         = next_page, 
                                      site_name         = site_name, 
                                      is_tag            = is_tag, 
                                      search_term       = search_term, 
                                      search_results    = search_results)
Example #15
0
def parse_search_table_row(row, quote_id):
    """

    :param row:
    :param quote_id:
    :return:
    """
    quote = row[1]
    movie_name = row[2]
    quote = quote_to_list(quote)
    quote_str = ""
    max_searches = 0
    max_line = ""
    #going over all lines, to see what score they get, and whether we are done:
    for line in quote:
        if (line == ""):

            continue
        #checking if we reached the end:
        m = INTERESTING_REG_PATTERN.match(line)
        if (m): #reached the end of the quote
            break
        else:
            quote_str += line+"\n"
            #removing the character name from the quote
            id = line.find(":") + 1
            query = "\""+movie_name+"\" \"" +line[id:]+"\""
            try:
                cur_searches = se.get_query_total_results(query)
            except:
                print("the id we got stuck in is " + str(quote_id))
                raise
            if (cur_searches  > max_searches):
                max_searches = cur_searches
                max_line = line

    return (quote_str,max_line, max_searches)
Example #16
0
def result():
    page = request.args.get('page', 1, type=int)
    if page > 0:
        page -= 1
    if request.method == 'POST':
        result = request.form
        query = result['query']
        if 'resultAmount' not in result.keys():
            resultAmount = 15
        else:
            resultAmount = int(result['resultAmount'])
    else:
        query = request.args.get('query', '', type=str)
        resultAmount = request.args.get('resultAmount', 0, type=int)
    # search engine!
    search = SearchEngine.searchEngine(query, 200, indexMarkers, docIDMarkers,
                                       stopWords, stemmer)
    if search == []:
        return render_template("result.html", valid=False)
    else:
        time = round(search[0], 2)
        infoList = search[1]
        totalResultAmount = search[2]
        if totalResultAmount < resultAmount:
            resultAmount = totalResultAmount
        # pagination (we are starting from page 1)
        offset = page * resultAmount
        infoList = infoList[offset:offset + resultAmount]
        page += 1
    return render_template("result.html",
                           valid=True,
                           page=page,
                           query=query,
                           resultAmount=resultAmount,
                           totalResultAmount=totalResultAmount,
                           infoList=infoList,
                           time=time)
	def test_one_document_type(self):
		parser = iE.create_parser()
		arg = parser.parse_args(['dummy query', '-text'])
		self.assertEqual(arg.document_types, ['text'])
Example #18
0
import string
Example #19
0
def _setup(text):
    root = text._root()
    engine = SearchEngine.get(root)
    if not hasattr(engine, "_searchdialog"):
        engine._searchdialog = SearchDialog(root, engine)
    return engine._searchdialog
Example #20
0
orders = [0,1,2]
bmas = [["diamond","/home/felsamps/Tcc/cache-mvc/inDiamondTZ.txt"], ["square", "/home/felsamps/Tcc/cache-mvc/inSquareTZ.txt"]]
sets = [1, 2, 3, 4, 5, 6, 7, 8]
blocks = [[16,16], [20,20], [40,40], [32,24], [40, 30]]
caches = [[2,2], [3,3], [4,4], [5,5], [6,6], [7,7], [8,8], [9,9], [10,10], [3,4], [4,5], [5,6], [3,5]]

stats = []

if __name__ == "__main__":
	i = 1
	total = len(orders) * len(bmas) * len(sets) * len(blocks) * len(caches)
	for order in orders:
		for bma in bmas:
			for set in sets:
				for block in blocks:
					for cache in caches:
						config = ConfigFile("fake")
						config.initConfigs(w, h, block[0], block[1], set, cache[0]*cache[1], cache[0], cache[1], order, bma[0])
						trace = TraceFile(bma[1])
						print "Executando configuracao " , i, "de", total
						engine = SearchEngine(trace, config)
						engine.process()
						stats.append(engine.getStats())
						i += 1

	fp = open("/home/felsamps/Tcc/cache-mvc/results/results.csv","w")
	stats[0].printHeader(fp)
	for result in stats:
		# @type result Stats
		result.reportFile(fp)
Example #21
0
import string
Example #22
0
from flask import Flask, render_template, url_for, request
import SearchEngine

app = Flask(__name__)
indexMarkers, docIDMarkers, stopWords, stemmer = SearchEngine.preliminary()


@app.route('/')
def index():
    return render_template('index.html')


@app.route('/result', methods=['POST', 'GET'])
def result():
    page = request.args.get('page', 1, type=int)
    if page > 0:
        page -= 1
    if request.method == 'POST':
        result = request.form
        query = result['query']
        if 'resultAmount' not in result.keys():
            resultAmount = 15
        else:
            resultAmount = int(result['resultAmount'])
    else:
        query = request.args.get('query', '', type=str)
        resultAmount = request.args.get('resultAmount', 0, type=int)
    # search engine!
    search = SearchEngine.searchEngine(query, 200, indexMarkers, docIDMarkers,
                                       stopWords, stemmer)
    if search == []:
Example #23
0
import string
Example #24
0
def test_steam_miss(test_list):
    test_num = 200
    auc_num = 0
    alph = random.sample('qwertyuiopasdfghjklzxcvbnm', 1)
    for ids in test_list.keys():
        game_name = test_list[ids]
        num = random.randint(0, len(game_name) - 1)
        change = game_name[num]
        new_name = game_name.replace(change, ''.join(alph), 1)
        results = get_search_results(
            "https://store.steampowered.com/search/?term=", new_name)
        if results is None:
            continue
        if len(results) > 10:
            results = results[:10]
        for game in results:
            if str(game['game_id']) == str(ids):
                auc_num += 1
                break
    print('test_num:', test_num, 'accuracy', auc_num / test_num)


if __name__ == "__main__":
    CACHE_DICT = open_cache()
    PS = PorterStemmer()
    search_engine = SearchEngine(config, word_tokenize, PS, isStemming=False)
    # test_list = search_engine.test_auc_zh_steam()
    # test_steam_miss(test_list)

    app.run(debug=True)
	def test_no_document_types(self):
		parser = iE.create_parser()
		arg = parser.parse_args(['dummy query'])
		self.assertEqual(arg.document_types, None)
Example #26
0
    parser.add_argument("--mode", choices=["train","eval","repr_code","search"], default='train',
                        help="The mode to run. The `train` mode trains a model;"
                        " the `eval` mode evaluate models in a test set "
                        " The `repr_code/repr_desc` mode computes vectors"
                        " for a code snippet or a natural language description with a trained model.")
    parser.add_argument("--gen", type=int, default='3', help="Number of GA generation")
    parser.add_argument("--chunk_size", type=int, default='20', help="Number of inputs")
    parser.add_argument("--mutation_rate", type=float, default='0.05', help="Mutation Rate")
    parser.add_argument("--verbose",action="store_true", default=True, help="Be verbose")
    return parser.parse_args()


if __name__ == '__main__':
    args = parse_args()
    config = getattr(configs, 'config_'+args.model)()
    engine = SearchEngine.SearchEngine(args, config)

    ##### Define model ######
    logger.info('Build Model')

    #tf.compat.v1.global_variables_initializer()
    model = getattr(models, args.model)(config)  # initialize the model
    model.build()
    model.summary(export_path = "./output/{}/".format(args.model))
    
    optimizer = config.get('training_params', dict()).get('optimizer', 'adam')
    model.compile(optimizer=optimizer)  

    data_path = args.data_path+args.dataset+'/'

import sys
from PySide2.QtCore import *
from PySide2.QtGui import *
from PySide2.QtWidgets import *
from vmmpy import *
sys.path.append("RemoteMemoryScanner")
from SearchEngine import *
from UserInterface import *

if __name__ == "__main__":
    app = QApplication(sys.argv)
    search_engine = SearchEngine()
    user_interface = UserInterface(search_engine)
    user_interface.main_window.show()
    sys.exit(app.exec_())
Example #28
0
__author__="felsamps"
__date__ ="$16/07/2010 15:27:49$"

import sys

from Cache import *
from ConfigFile import *
from MMU import *
from TraceFile import *
from SearchEngine import *


if __name__ == "__main__":
	configFile = ConfigFile(sys.argv[1])
	configFile.parseFile()
	traceFile = TraceFile(sys.argv[2])
	me = SearchEngine(traceFile, configFile)
	me.process()
	def test_query(self):
		parser = iE.create_parser()
		arg = parser.parse_args(['dummy query'])
		self.assertEqual(arg.query, 'dummy query')
Example #30
0
from Tkinter import *
import pywikibot
import RevisionPuller as RP
import SearchEngine as SE
import PageProcessor as PP

engine = SE.SearchEngine()
processor = PP.PageProcessor()


def get_readable_text_of_old_revision(page_title: str, rev_id: int):
    """
    Returns a string containing a "readable" version of a revision
    :param page_title: A string of the page title
    :param rev_id: The revision number of the desired revision
    :return: A string of the revision's readable text
    """
    page = engine.search(page_title, 1, "nearmatch")[0]
    return processor.getReadableText(RP.get_text_of_old_revision(page, rev_id))


def get_revisions(page_title: str,
                  recent_to_oldest: bool = True,
                  num_revisions=None,
                  start_time: pywikibot.Timestamp = None,
                  end_time: pywikibot.Timestamp = None):
    """
    Returns the last (num_revisions) revisions from a given Wikipedia page
    If all revisions are desired use: get_latest_revisions(page)
    :param page_title: A string containing the title of the desired page
    :param recent_to_oldest: Set to false if we want the revisions in order of oldest to most recent
    :param num_revisions: The number of revisions to be grabbed (set to an integer to set limit to number of revisions grabbed)
Example #32
0
from Tkinter import *
Example #33
0
class SearchGUI(wx.Frame):
           
    def __init__(self, *args, **kw):
        super(SearchGUI, self).__init__(*args, **kw) 
        
        self.InitUI()

        #flags indicating checkbox states.
        self.enrichFlag = False
        self.SVDFlag = False
        self.metric = COSINE

        #self.searchEng = SearchEngine()
        
        try:
            self.searchEng = SearchEngine()
        except Exception as e:
            self.errorMessage("Error: Feature vectors have not been created!")
            

    def InitUI(self):   

        '''
            Defining all the widgets.
        '''
        self.panel = wx.Panel(self)

        #Label indicating query.
        self.label = wx.StaticText(self.panel, label="What do you want to search for: ", size = (200, -1))

        #Text box for the query.
        self.queryBox = wx.TextCtrl(self.panel, size=(190, -1))

        self.checkPanel = wx.Panel(self.panel)

        #check box for query enrich
        self.enrichCheck = wx.CheckBox(self.checkPanel, label='Enrich Query', pos=(20, 20))
        self.enrichCheck.SetValue(False)
        self.enrichCheck.Bind(wx.EVT_CHECKBOX, self.changeEnrichFlag)

        #check box for SVD
        self.svdCheck = wx.CheckBox(self.checkPanel, label='SVD', pos=(20, 40))
        self.svdCheck.SetValue(False)
        self.svdCheck.Bind(wx.EVT_CHECKBOX, self.changeSVDFlag)

        #Radio buttons for similarity metrics
        self.rbPanel = wx.Panel(self.panel)
        self.rb1 = wx.RadioButton(self.rbPanel, label='Cosine', pos=(10, 10), style=wx.RB_GROUP)
        self.rb2 = wx.RadioButton(self.rbPanel, label='Norm', pos=(10, 30))
        self.rb3 = wx.RadioButton(self.rbPanel, label='Chebyshev', pos=(10, 50))
        self.rb4 = wx.RadioButton(self.rbPanel, label='Correlation', pos=(10, 70))

        self.rb1.Bind(wx.EVT_RADIOBUTTON, self.SetVal)
        self.rb2.Bind(wx.EVT_RADIOBUTTON, self.SetVal)
        self.rb3.Bind(wx.EVT_RADIOBUTTON, self.SetVal)
        self.rb4.Bind(wx.EVT_RADIOBUTTON, self.SetVal)

        #Search button
        self.searchButton = wx.Button(self.panel, label='Search', pos=(20, 30))
        self.searchButton.Bind(wx.EVT_BUTTON, self.executeSearch)

 		#managing layout
        self.sizer = wx.GridBagSizer(3, 2)
        self.sizer.Add(self.label, (0, 0))
        self.sizer.Add(self.queryBox, (0, 1))
        self.sizer.Add(self.checkPanel, (1, 0))
        self.sizer.Add(self.rbPanel, (1, 1))
        self.sizer.Add(self.searchButton, (2, 1))
    
        # Use the sizers
        self.panel.SetSizerAndFit(self.sizer) 
        self.SetSize((420,220))
        self.SetTitle('Search Web Service')
        self.Centre()
        self.Show(True)          
        
    def executeSearch(self, e):
        
        '''
            Execute the query depending on the enrich and SVD flags.
        '''
        
       	qString = self.queryBox.GetValue()

       	if not qString:
       		self.errorMessage("Your query is empty!")
       	else:
            if self.enrichFlag:
                qString = enrich(qString)

            try:    
                start = time.time()
                if self.SVDFlag:
                    sortedFileDist = self.searchEng.svdSearch(qString, self.metric)
                else:
                    sortedFileDist = self.searchEng.normalSearch(qString, self.metric)
                end = time.time()

                htmlPresenter(sortedFileDist, HTML_DIR, qString, end-start)
                self.successMessage()  
            except Exception as e:
                self.errorMessage(str(e))

    def changeSVDFlag(self, e):
        
        sender = e.GetEventObject()
        self.SVDFlag = sender.GetValue()  
        if self.SVDFlag:
            self.rbPanel.Disable()
        else:
            self.rbPanel.Enable()

    def changeEnrichFlag(self, e):
        
        sender = e.GetEventObject()
        self.enrichFlag = sender.GetValue()

    def SetVal(self, e):
        
        state1 = self.rb1.GetValue()
        state2 = self.rb2.GetValue()
        state3 = self.rb3.GetValue()
        state4 = self.rb4.GetValue()

        if state1:
            self.metric = COSINE
        elif state2:
            self.metric = NORM
        elif state3:
            self.metric = CHEBYSHEV
        else:
            self.metric = CORRELATION

    def errorMessage(self, msg):

        dial = wx.MessageDialog(None, msg, 'Error', wx.OK | wx.ICON_ERROR)
        dial.ShowModal()      

    def successMessage(self):

        dial = wx.MessageDialog(None, 'Results are generated', 'Info', wx.OK)
        dial.ShowModal()       
Example #34
0
#一个简单的搜索测试页面
import cgi,os
import SearchEngine  #导入搜索引擎模块
import NeuralNetwork #导入神经网络模块

print "Content-type:text/html"
print

print """ <!DOCTYPE>
<HTML>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
<title>A easy search engine</title>
</head>
<body>"""

form=cgi.FieldStorage()
q=cgi.escape(form["query"].value)
if form.has_key("query"):
	#爬去网页,建立索引
	crawler=SearchEngine.Crawler("db_search.db")
	#crawler.create_index_tables()  #运行一次即可
	crawler.make_stopwords(stopwords_file="stopwords.txt")
	pages=["http://book.douban.com/"]                 #预先准备好的url
	crawler.crawl(pages,depth=1)
	crawler.cal_pagerank(iterations=15)
	searcher=SearchEngine.Searcher("db_search.db")
	mynet=NeuralNetwork.SearchNet("db_network.db")
	#mynet.make_tables()   #运行一次即可
	mynet.train_query(searcher.query(q))
Example #35
0
def _setup(text):
    root = text._root()
    engine = SearchEngine.get(root)
    if not hasattr(engine, "_searchdialog"):
        engine._searchdialog = SearchDialog(root, engine)
    return engine._searchdialog
Example #36
0
    # use default values.
    try:
        ip = sys.argv[1]
        port = sys.argv[2]
        print "Starting in production Mode: {0}, {1}.".format(ip, port)
    except IndexError:
        print "Starting in development Mode: localhost, 8000."
        ip = 'localhost'
        port = 8000

    # The metadata_shelve holds information about available sites and their sizes,
    # and the tags for each site, and their sizes.
    # {site_name: (sites metadata, tags metadata)}
    metadata_shelve = shelve.open('../Metadata/metadata.db', protocol = -1)

    s_e_sites = []
    tags_dict = {}
    site_names = metadata_shelve.keys()

    for site_name in site_names:
        s_e_sites.append((site_name, metadata_shelve[site_name][0]))
        tags_dict.update({site_name: metadata_shelve[site_name][1]})

    metadata_shelve.close()

    #Scan the /Index directory for all the available search engine indexes.
    index_pointers = SearchEngine.get_all_index_pointers('../Index', site_names)
    
    #Run the webserver
    run(host = ip, port = port, debug = True)