Python searcher Examples, searchengine.searcher Python Examples

Example #1

0

Show file

File: Jareth_Moyo.py Project: DrJayLight/Search_Engine

    def content_based_calculator(self, stringofwords):
        divider = re.compile('\\W*')
        res=[x.lower() for x in divider.split(stringofwords) if x!= '']
        content_scores={}
        paperid_index={}
        content_out={}
        index=1
        for word in res:
            if word in self.wordlocations:
                for papid in self.wordlocations[word]:
                    papid_score=len(self.wordlocations[word][papid])
                    if papid not in content_scores:
                        content_scores[papid]=papid_score
                        paperid_index[papid]=index
                    else:
                        content_scores[papid]=content_scores[papid]*papid_score
                        paperid_index[papid]+=1
            else:
                continue

        for pid in paperid_index:
            if paperid_index[pid]==len(res):
                content_out[pid]=content_scores[pid]

        inst=searchengine.searcher('database')
        content_out=inst.normalizescores(content_out)
        self.contentscore= content_out

Example #2

0

Show file

File: generateDataWithDBalready.py Project: JaminJiang/spiderAndAnalysis

def generateFig(filePath):
    e=searchengine.searcher('searchindex.db')
    frequencies= e.getFrequentWords()
    # take relative word frequencies into account, lower max_font_size
    #wordcloud = WordCloud(max_font_size=40, relative_scaling=.5).generate(text)
    wordcloud = WordCloud(font_path='/home/jamin/Documents/resource/msyh.ttf',background_color="white",stopwords=STOPWORDS.add(u"黄豆"),max_font_size=40, relative_scaling=.25).fit_words(frequencies)
    plt.imshow(wordcloud)
    plt.axis("off")
    plt.savefig(filePath)

Example #3

0

Show file

File: flasky.py Project: takukawasaki/pysearch

def makeindex(key):
    e = s.searcher('searchindex.db')
    result = e.query(key)
    List = []
    size = len(result)
    for i in range(size):
        for j in result[i]:
            List.append(e.geturlname(j))
    return List

Example #4

0

Show file

File: run.py Project: wz125/courses

def pageRank():
  reload(searchengine)
  crawler=searchengine.crawler('searchindex.db')
  e=searchengine.searcher('searchindex.db')
  #crawler.calculatepagerank( )
  cur=crawler.con.execute('select * from pagerank order by score desc')
  for i in range(3): 
   d=cur.next()
   print d,e.geturlname(d[0])

Example #5

0

Show file

File: flasky.py Project: takukawasaki/searchEngine

def makeindex(key):
  e = s.searcher('searchindex.db')
  result = e.query(key)
  List = []
  size = len(result)
  for i in range(size):
    for j in result[i]:
      List.append(e.geturlname(j))
  return List

Example #6

0

Show file

File: app.py Project: shaunswanson/hipsterpics

def queryhandler():
    e = searchengine.searcher()
    q = bottle.request.forms.get("query")
    mywords, myurls = e.query(q)
    s = bottle.request.environ.get('beaker.session')
    s['mywords'] = mywords
    s['myurls'] = myurls
    s.save()
    bottle.redirect('/results')

Example #7

0

Show file

File: main.py Project: tayashigenori/collective_intelligence

def test_calculate_pagerank():
    sys.stderr.write("testing pagerank calculation...\n")
    crawler=searchengine.crawler('searchindex.db')
    crawler.calculatepagerank()
    sys.stderr.write("checking pagerank result...\n")
    cur=crawler.con.execute('select * from pagerank order by score desc')
    for i in range(3): print cur.next()
    sys.stderr.write("checking pagerank top url...\n")
    e=searchengine.searcher('searchindex.db')
    urlid=cur.next()[0]
    print e.geturlname(urlid)

Example #8

0

Show file

File: unittestquery.py Project: ssashita/tobysegaran

 def testQueryIndian(self):
     wordids=[]
     rows=[]
     if self.config.queries == None or len(self.config.queries) <= 0:
         queries=['memory', 'mental', 'mind', 'storage', 'magnetic', 'cache', 'psychological', 'semiconductor', 'transistor', 'random access', 'data storage']
     else:
         queries = self.config.queries
     s=searcher(self.dbname)
     if self.numusers >= 1:
         for q in queries:
             for userid in [x+1 for x in range(self.numusers)]:
                 wordids,rows = s.query(q, userid)

Example #9

0

Show file

File: searchui.py Project: ssashita/tobysegaran

def firesearch():
    outputwidget.delete(1.0,END)
    fillconfig()
    s=searcher(config.dbname)
    q=queryvar.get()
    urllist=[]
    try:
        widlist,urlidlist = s.query(q,config.userid,config.userurlhitscoresweight)
        for urlid in urlidlist:
            url=s.geturlname(urlid)
            urllist.append(url)
        outputwidget.insert(END, '\n'.join(urllist))
    except:
        print "Error:", sys.exc_info()
        tkMessageBox.showerror("Input Error", sys.exc_info())
        raise

Example #10

0

Show file

File: searchengine_web.py Project: Ignorant-Instigator/collectiveintelligence-book

def serve_search(environ, start_response):

  query_words = ''
  results = ''
  if 'QUERY_STRING' in environ:
    query_dict = cgi.parse_qs(environ['QUERY_STRING'])
    if 'q' in query_dict:
      # parse_qs returns a list for values as query parameters can appear
      # several times (e.g. 'q=ddsview&q=makeicns'). Ignore all but the first
      # occurence of q.
      query_words = query_dict['q'][0]
      s = searchengine.searcher('searchindex.db')
      results = '<br>\n'.join(['%f: <a href="%s">%s</a>' % (score, url, url)
        for score, url in s.query(query_words)])
      results = results.encode('utf-8')

  # Note: this also returns html for favicon queries.
  start_response('200 OK',[('Content-type','text/html')])
  return [template % locals()]

Example #11

0

Show file

File: Jareth_Moyo.py Project: DrJayLight/Search_Engine

 def pagerank_calculator(self, iterations=20):
     pageranks={}
     for item in self.citations:
         pageranks.setdefault(item,1.0)
     for i in range(iterations):
         #print 'Iteration' %i
         pr=0.15
         for item in pageranks:
             init_score=0
             for element in self.citations[item]:
                 if element not in pageranks:
                     val=1.0
                 else:
                     val=pageranks[element]
                 linknum=self.citationcounts[element]
                 init_score+=float(val/linknum)
             pageranks[item]=pr+(0.85*init_score)
     inst=searchengine.searcher('database')
     pageranks=inst.normalizescores(pageranks)
     #print pageranks['9402117']
     self.pagerankscore=pageranks

Example #12

0

Show file

File: searchengine_web.py Project: xzjs/collectiveintelligence-book

def serve_search(environ, start_response):

    query_words = ''
    results = ''
    if 'QUERY_STRING' in environ:
        query_dict = cgi.parse_qs(environ['QUERY_STRING'])
        if 'q' in query_dict:
            # parse_qs returns a list for values as query parameters can appear
            # several times (e.g. 'q=ddsview&q=makeicns'). Ignore all but the first
            # occurence of q.
            query_words = query_dict['q'][0]
            s = searchengine.searcher('searchindex.db')
            results = '<br>\n'.join([
                '%f: <a href="%s">%s</a>' % (score, url, url)
                for score, url in s.query(query_words)
            ])
            results = results.encode('utf-8')

    # Note: this also returns html for favicon queries.
    start_response('200 OK', [('Content-type', 'text/html')])
    return [template % locals()]

Example #13

0

Show file

File: generateDataWithDBalready.py Project: JaminJiang/spiderAndAnalysis

def generatePosNegFile(filepath):
    f=open(filepath,'w')
    e=searchengine.searcher('searchindex.db')
    cursor= e.con.execute(
            " select * from urllist where posnegscore is not null order by posnegscore desc limit 3  " )
    for row in cursor:
        f.write(row[0])
        f.write("\t")
        f.write(str(row[1]))
        f.write("\t")
        f.write("pos")
        f.write("\n")
    cursor= e.con.execute(
            " select * from urllist where posnegscore is not null order by posnegscore asc limit 3  " )
    for row in cursor:
        f.write(row[0])
        f.write("\t")
        f.write(str(row[1]))
        f.write("\t")
        f.write("neg")
        f.write("\n")
    f.close()

Example #14

0

Show file

File: Test_Chapter4.py Project: yeahydq/programming-collective-intelligence-code

#coding:utf-8
#!/usr/bin/env python
__author__ = 'dick'

import searchengine

craw = searchengine.crawler('searchindex.db')
# craw.createindextables()
pages = [
    # 'http://www.bbc.com/',
    'https://www.hao123.com/?1477704964',
    # 'https://www.baidu.com',
]

# craw.crawl(pages)

e = searchengine.searcher('searchindex.db')
print e.getmatchrows('hao weather yes')

Example #15

0

Show file

File: runquery.py Project: ssashita/tobysegaran

'''
Created on Feb 16, 2014

@author: ssashita
A query is to be given as 
 python runquery.py 1 functional programming
 {1 is the userid, and the rest are the query words} 
'''

from searchengine import searcher
import sys
import cconfigurator

if __name__ == '__main__':
    config = cconfigurator.configure('crawled.db')
    listargs=[]
    if len(sys.argv) > 2:
        for arg in sys.argv[2:]:
            listargs.append(arg)
        s=searcher('crawled.db')
        s.query(' '.join([str(x) for x in listargs]),sys.argv[1])
    else:
        print("At least 3 args required. Second one is the userid and rest are the query words")

Example #16

0

Show file

File: test.py Project: timothydotchan/search_engine

def query():
    e = searchengine.searcher('searchIndex.db')
    print e.query('functional programming...')

Example #17

0

Show file

File: test.py Project: cloudaice/simple-search-engine

def testquery(q = 'functional programming'):
   search = searchengine.searcher()
   search.query(q)

Example #18

0

Show file

File: main.py Project: digideskio/MLStudy

    print '\n'

if __name__ == '__main__':
    '''
2. Boolean operations. Many search engines support Boolean queries, which allow
users to construct searches like "python OR perl." An OR search can work by
doing the queries separately and combining the results, but what about "python
AND (program OR code)"? Modify the query methods to support some basic
Boolean operations.
3. Exact matches. Search engines often support "exact match" queries, where the
words in the page must match the words in the query in the same order with no
additional words in between. Create a new version of getrows that only returns
results that are exact matches. (Hint: you can use subtraction in SQL to get the
difference between the word locations.) 
    '''
    dbname = 'searchindex.db'
    if True:
        crawler = se.crawler(dbname)
        crawler.createindextables()
        pages = [
            'https://www.zhihu.com/',
            'https://github.com/'
        ]
        crawler.crawl(pages, depth=2)
        crawler.calculatepagerank()
    else:
        searcher = se.searcher(dbname)
        q = 'zhihu career'
        print searcher.query(q)

Example #19

0

Show file

File: example.py Project: hotbaby/programming-collective-intelligence

def test_se_search():
    searcher = se.searcher('crawler.db')
    result = searcher.query('python language blog')
    print result

Example #20

0

Show file

File: main.py Project: tayashigenori/collective_intelligence

def test_query_ranking(weightFunc):
    sys.stderr.write("testing query with weighting function '%s'...\n" % weightFunc)
    e=searchengine.searcher('searchindex.db')
    print e.query('programming',weightFunc)

Example #21

0

Show file

from flask import Flask, render_template, request, redirect
import searchengine, neuralnet, crawler
searcher = searchengine.searcher('searchengine.db')
crawler = crawler.crawler('searchengine.db')
nnet = neuralnet.searchnet('nn.db')


app = Flask(__name__)


@app.route("/")
def search():
	if request.args:
		queryText = request.args.get('q')
		(wordids, scores, urlIdsList, urlsList) = searcher.query(queryText)
		if len(urlIdsList) != 0:
			listOfItems = [{'id': urlIdsList[i], 'url': urlsList[i], 'score': scores[i]} for i in range(len(urlIdsList))]
		else:
			listOfItems = []
		return render_template('index.html', list=listOfItems, q=queryText)
	return render_template('index.html', list=None)


@app.route('/train', methods=['POST', 'GET'])
def train():		
	if request.method == 'POST':
		queryPhrase = request.json['q']
		selectedURLId = int(request.json['clicked'])
		app.logger.debug('queryPhrase: %s => selectedURLId: %s' %(queryPhrase, selectedURLId))
		(wordids, scores, urlIdsList, urlsList) = searcher.query(queryPhrase)
		nnet.trainquery(wordids, urlIdsList, selectedURLId)

Example #22

0

Show file

File: test4.py Project: qingfeng0820/ML

def test_full_match_words():
    s = searcher("output/search.db")
    print s.getfullmatchrows("simple web page")

Example #23

0

Show file

File: run.py Project: wz125/courses

def wordFrequency():
  reload(searchengine)
  e=searchengine.searcher('searchindex.db')
  e.query('sqlite3 python')

Example #24

0

Show file

File: views.py Project: rupesh-5/fuzzycloudsearch

from django.conf.urls.static import static
from django.shortcuts import render
from django.http import HttpResponse, HttpRequest
from django.shortcuts import render_to_response
from django.template import RequestContext
import searchengine
import nn

e = searchengine.searcher('wikipedia.db')
allurls = e.getallurls("functional")

# Create your views here.


def home_view(request):
    return HttpResponse(request.method)


def search_string(request):
    query = request.GET['searchquery']
    data = e.query(query)
    context_dict = {'results': data, 'query': query}
    return render_to_response('results_page.html', context_dict)


def train_nn(request, page_alias, selected_result):
    network = nn.searchnet('nn.db')
    words = e.getwordids(page_alias)
    if (selected_result.endswith("/")):
        selected_result = selected_result[:-1]
    urlid = e.geturlid(selected_result)

Example #25

0

Show file

File: test_searchengine.py Project: duncanmortimer/Working-Through-PCI

 def setUp(self):
     self.s = searchengine.searcher("test.db")

Example #26

0

Show file

File: test.py Project: timothydotchan/search_engine

def query():
	e=searchengine.searcher('searchIndex.db')
	print e.query('functional programming...')

Example #27

0

Show file

File: run.py Project: yiran02/study

import searchengine
pages = ['https://news.google.com.tw/']
crawler = searchengine.crawler('test')
crawler.createindextables()  #create tables

crawler.crawl(pages)

crawler.caculatepagerank()

e = searchengine.searcher('test')
e.query('單場 球季')

Example #28

0

Show file

File: run.py Project: wz125/courses

def contentranking():
  reload(searchengine)
  #mynet=nn.searchnet('nn.db')
  #mynet.maketables()
  e=searchengine.searcher('searchindex.db')
  e.query('sqlite3 python')

Example #29

0

Show file

# -*- coding: utf-8 -*-
from tornado.ioloop import IOLoop
from tornado.web import RequestHandler, Application, url, StaticFileHandler
import os.path
import sys

sys.path.insert(0, os.path.abspath("../collective-intelligence"))

import searchengine as se
searcher = se.searcher("index.db")

foofle_data = {"query" : "",
               "results" : []}

def update_data(query):
    foofle_data["query"] = query
    foofle_data["results"] = searcher.query(query)

class MainHandler(RequestHandler):
    def initialize(self, data):
        self.data = data

    def get(self):
        self.render("index.html", query = self.data["query"], results = self.data["results"])

    def post(self):
        query = self.get_argument("input-query")
        print "La busqueda que se realizara utilizara la cadena '%s' como consulta" % query
        update_data(query)
        self.get()

Example #30

0

Show file

File: run.py Project: wz125/courses

def documentLocation():
  reload(searchengine)
  e=searchengine.searcher('searchindex.db')
  e.query('sqlite3 python')

Example #31

0

Show file

File: main.py Project: tayashigenori/collective_intelligence

def test_getmatchrows():
    sys.stderr.write("testing get match rows...\n")
    e=searchengine.searcher('searchindex.db')
    print e.getmatchrows('programming')

Example #32

0

Show file

File: query.py Project: YauzZ/searchengine

## coding:utf-8 ##
import searchengine

e=searchengine.searcher('searchindex.db')
#print e.getmatchrows('perl python functional')
while 1:
	print "输入查询的单词(en)"
	q=raw_input()
	print e.query(q)

Example #33

0

Show file

File: main.py Project: tayashigenori/collective_intelligence

def test_query():
    sys.stderr.write("testing query...\n")
    e=searchengine.searcher('searchindex.db')
    print e.query('programming')

Example #34

0

Show file

for test in eval_tests:
    node = ast.parse(test)
    print ast.dump(node)
    # MyVisitor().visit(node)
    print '\n'

if __name__ == '__main__':
    '''
2. Boolean operations. Many search engines support Boolean queries, which allow
users to construct searches like "python OR perl." An OR search can work by
doing the queries separately and combining the results, but what about "python
AND (program OR code)"? Modify the query methods to support some basic
Boolean operations.
3. Exact matches. Search engines often support "exact match" queries, where the
words in the page must match the words in the query in the same order with no
additional words in between. Create a new version of getrows that only returns
results that are exact matches. (Hint: you can use subtraction in SQL to get the
difference between the word locations.) 
    '''
    dbname = 'searchindex.db'
    if True:
        crawler = se.crawler(dbname)
        crawler.createindextables()
        pages = ['https://www.zhihu.com/', 'https://github.com/']
        crawler.crawl(pages, depth=2)
        crawler.calculatepagerank()
    else:
        searcher = se.searcher(dbname)
        q = 'zhihu career'
        print searcher.query(q)

Example #35

0

Show file

File: search_test.py Project: HHHHHamburger/MyCodes-1

#!/usr/bin/python
# coding: UTF-8
# Author: David
# Email: [email protected]
# Created: 2016-08-01 14:08
# Last modified: 2016-08-01 15:54
# Filename: search_test.py
# Description:
import searchengine
e = searchengine.searcher()
e.query('form authentication')