Esempio n. 1
0
def nn2():
	import nn

	wWorld,wRiver,wBank = 101,102,103
	uWorldBank,uRiver,uEarth = 201,202,203
	mynet = nn.searchnet("nn.db")
	print(mynet.getresult([wWorld,wBank],[uWorldBank,uRiver,uEarth]))
Esempio n. 2
0
 def __init__(self):
     self.net = nn.searchnet('gridwar.db')
     # self.net.maketables()
     self.oUP = 0
     self.oDown = 1
     self.oLeft = 2
     self.oRight = 3
     self.outputs = [self.oUP, self.oDown, self.oLeft, self.oRight]
Esempio n. 3
0
File: run.py Progetto: wz125/courses
def onclick():
  mynet=nn.searchnet('nn.db')
  wWorld,wRiver,wBank =101,102,103
  uWorldBank,uRiver,uEarth =201,202,203
  mynet.generatehiddennode([wWorld,wBank],[uWorldBank,uRiver,uEarth])
  for c in mynet.con.execute('select * from wordhidden'): print c
  for c in mynet.con.execute('select * from hiddenurl'): print c
  print mynet.getresult([wWorld,wBank],[uWorldBank,uRiver,uEarth])
Esempio n. 4
0
 def __init__(self):
     self.net = nn.searchnet('gridwar.db')
     # self.net.maketables()
     self.oUP = 0
     self.oDown = 1
     self.oLeft = 2
     self.oRight = 3
     self.outputs = [self.oUP, self.oDown, self.oLeft, self.oRight]
Esempio n. 5
0
def train_nn(request, page_alias, selected_result):
    network = nn.searchnet('nn.db')
    words = e.getwordids(page_alias)
    if (selected_result.endswith("/")):
        selected_result = selected_result[:-1]
    urlid = e.geturlid(selected_result)
    network.trainquery(words, allurls, urlid)
    context_dict = {'link': selected_result}
    return render_to_response('redirect_page.html', context_dict)
def test_select():
    sys.stderr.write("testing create hiddennodes...\n")
    mynet=nn.searchnet('nn.db')
    mynet.maketables()
    wWorld,wRiver,wBank =101,102,103
    uWorldBank,uRiver,uEarth =201,202,203
    mynet.generatehiddennode([wWorld,wBank],[uWorldBank,uRiver,uEarth])
    sys.stderr.write("testing 'select * from wordhidden'...\n")
    for c in mynet.con.execute('select * from wordhidden'): print c
    sys.stderr.write("testing 'select * from hiddenurl'...\n")
    for c in mynet.con.execute('select * from hiddenurl'): print c
def test_trainqueries():
    sys.stderr.write("testing training queries...\n")
    mynet=nn.searchnet('nn.db')
    allurls=[uWorldBank,uRiver,uEarth]
    for i in range(30):
        mynet.trainquery([wWorld,wBank],allurls,uWorldBank)
        mynet.trainquery([wRiver,wBank],allurls,uRiver)
        mynet.trainquery([wWorld],allurls,uEarth)

    print mynet.getresult([wWorld,wBank],allurls)
    print mynet.getresult([wRiver,wBank],allurls)
    print mynet.getresult([wBank],allurls)
Esempio n. 8
0
def result():
    if request.method == 'POST':
        a = nn.searchnet('Truth.db')
        res = a.getresult(request.form['statement']) 
        if res[0] > res[1]:
            return render_template('detection.html', truthOrLie='Lie') 
        else:
            print(res)
            return render_template('detection.html', truthOrLie='Truth')
    
    else:
        return render_template("detect.html")
Esempio n. 9
0
def nn():
	import nn
	mynet = nn.searchnet("nn.db")
	mynet.maketables()
	wWorld,wRiver,wBank = 101,102,103
	uWorldBank,uRiver,uEarth = 201,202,203
	mynet.generatehiddennode([wWorld,wBank],[uWorldBank,uRiver,uEarth])

	for c in mynet.con.execute("select * from wordhidden"):
		print(c)

	for c in mynet.con.execute("select * from hiddenurl"):
		print(c)
Esempio n. 10
0
File: run.py Progetto: wz125/courses
def trainingTest():
  mynet=nn.searchnet('nn.db')
  wWorld,wRiver,wBank =101,102,103
  uWorldBank,uRiver,uEarth =201,202,203
  allurls=[uWorldBank,uRiver,uEarth]
  for i in range(30):
    mynet.trainquery([wWorld,wBank],allurls,uWorldBank)
    mynet.trainquery([wRiver,wBank],allurls,uRiver)
    mynet.trainquery([wWorld],allurls,uEarth)
  print mynet.getresult([wWorld,wBank],allurls)
  print mynet.getresult([wRiver,wBank],allurls)
  print mynet.getresult([wBank],allurls)
  '''
def test_nn():
    online, pharmacy = 1, 2
    spam, notspam = 1, 2
    possible = [spam, notspam]
    neuralnet = nn.searchnet('nntest.db')
    neuralnet.maketables()
    neuralnet.trainquery([online], possible, notspam)
    neuralnet.trainquery([online, pharmacy], possible, spam)
    neuralnet.trainquery([pharmacy], possible, notspam)
    neuralnet.getresult([online, pharmacy], possible)
    neuralnet.getresult([online], possible)
    neuralnet.trainquery([online], possible, notspam)
    neuralnet.getresult([online], possible)
    neuralnet.trainquery([online], possible, notspam)
    neuralnet.getresult([online], possible)
    quit()
Esempio n. 12
0
    def getscoredlist(self, rows, wordids):
        #文档:分数
        totalscores = dict([(row[0], 0) for row in rows])

        mynet = nn.searchnet('nn.db')

        #多种考量方法,加权累计评分
        weights = [(1.0, self.frequencyscore(rows)),
                   (1.0, self.locationscore(rows)),
                   (1.0, self.distancescore(rows)),
                   (1.0, self.inboundlinkscore(rows)),
                   (1.0, self.pagerankscore(rows)),
                   (1.0, self.linktextscore(rows, wordids)),
                   (2, self.nnscore(rows, wordids, mynet))]

        for (weight, scores) in weights:
            for url in totalscores:
                totalscores[url] += weight * scores[url]

        return totalscores
Esempio n. 13
0
def train_nn(train_path, test_path):
    ham = 0
    spam = 1

    allans = [ham, spam]

    words = {}

    spamnet = nn.searchnet('spam.db')
    spamnet.maketables()

    for filename in glob.glob(train_path):
        with open(filename, 'r') as f:
            f = f.read()
            for word in nn.getwords(f):
                if words.has_key(word) == False:
                    wordslen = len(words) + 2
                    words[word] = wordslen

    cnt = 1
    for filename in glob.glob(train_path):
        print cnt
        cnt = cnt + 1
        with open(filename, 'r') as f:
            f = f.read()
            features = nn.getwords(f)
            wordNum = [words[word] for word in features]
            spamnet.generatehiddennode(wordNum, allans)
            label = filename.split('.')[3]
            if label == 'ham':
                label = 0
            else:
                label = 1
            spamnet.trainquery(wordNum, allans, label)

    print "Train Done!"
def test_trainquery():
    sys.stderr.write("testing training query...\n")
    mynet=nn.searchnet('nn.db')
    mynet.trainquery([wWorld,wBank],[uWorldBank,uRiver,uEarth],uWorldBank)
    print mynet.getresult([wWorld,wBank],[uWorldBank,uRiver,uEarth])
def test_feedforward():
    sys.stderr.write("testing feedforward (without training)...\n")
    mynet=nn.searchnet('nn.db')
    print mynet.getresult([wWorld,wBank],[uWorldBank,uRiver,uEarth])
from urllib.request import urlopen
from bs4 import BeautifulSoup
from urllib.parse import urljoin
import sqlite3 as sql
from sqlite3 import Error
import re

import nn
mynet = nn.searchnet('nn.db')  # Connect to neural network database

# Create a list of words to ifnore
ignorewords = set(['the', 'of', 'to', 'and', 'a', 'in', 'is', 'it'])

# DB connection function


def connect(db_file):

    try:
        conn = sql.connect(db_file)
        return conn
    except Error as e:
        print(e)

    return None


class crawler:
    # Initialize the crawler with the name of database
    def __init__(self, dbname):
        self.con = sql.connect(dbname)
Esempio n. 17
0
File: run.py Progetto: wz125/courses
def backpropagation():
  mynet=nn.searchnet('nn.db')
  wWorld,wRiver,wBank =101,102,103
  uWorldBank,uRiver,uEarth =201,202,203
  mynet.trainquery([wWorld,wBank],[uWorldBank,uRiver,uEarth],uWorldBank)
  print mynet.getresult([wWorld,wBank],[uWorldBank,uRiver,uEarth])
Esempio n. 18
0
# coding: UTF-8
# Author: David
# Email: [email protected]
# Created: 2016-07-30 15:25
# Last modified: 2016-08-02 16:06
# Filename: searchengine.py
# Description:
import urllib2
from BeautifulSoup import *
from urlparse import urljoin
import MySQLdb
import re
import nn

ignore_words = set(['the', 'of', 'to', 'and', 'a', 'in', 'is', 'it'])
mynet = nn.searchnet()


class crawler:
    def __init__(self,
                 db='PCI',
                 host='localhost',
                 user='******',
                 passwd='root',
                 port=3306):
        self.con = MySQLdb.connect(host=host,
                                   user=user,
                                   passwd=passwd,
                                   port=port,
                                   db=db)
        self.cur = self.con.cursor()
Esempio n. 19
0
File: example.py Progetto: zydxt/PCI
get_ipython().magic(u'logstart example.py append')
import nn
online, pharmacy = 1, 2
spam, notspam = 1, 2
possible = [spam, notspam]
neuralnet = nn.searchnet('nntest.db')
neuralnet.maketables()
neuralnet.trainquery([online], possible, notspam)
neuralnet.trainquery([online, pharmacy], possible, spam)
neuralnet.trainquery([pharmacy], possible, notspam)
neuralnet.getresult([online, pharmacy], possible)
neuralnet.getresult([online], possible)
neuralnet.trainquery([online], possible, notspam)
neuralnet.getresult([online], possible)
neuralnet.trainquery([online], possible, notspam)
neuralnet.getresult([online], possible)
quit()
Esempio n. 20
0
import searchengine as s
import nn
from flask import Flask
from flask import render_template, request
import os

mynet = nn.searchnet("nn.db")

tempate = os.path.join(os.getcwd(), 'templates')
app = Flask(__name__, template_folder=tempate)


def makeindex(key):
    e = s.searcher('searchindex.db')
    result = e.query(key)
    List = []
    size = len(result)
    for i in range(size):
        for j in result[i]:
            List.append(e.geturlname(j))
    return List


@app.route('/', methods=['GET', 'POST'])
def index():
    if request.method == 'POST':
        keyword = request.form['keyword']
        res_list = makeindex(keyword)
        if keyword:
            return render_template('search.html',
                                   query=res_list,
Esempio n. 21
0
class searcher:
    def __init__(self, dbname):
        self.conn = sqlite.connect(dbname)

    def __del__(self):
        self.conn.close()

    def getmatchrows(self, q):
        # Strings to build the query
        fieldlist = 'w0.urlid'
        tablelist = ''
        clauselist = ''
        wordids = []
        # Split the words by spaces
        words = q.split(' ')
        tablenumber = 0
        for word in words:
            # Get the word ID
            wordrow = self.conn.execute(
                "select rowid from wordlist where word='%s'" %
                word).fetchone()
            if wordrow != None:
                wordid = wordrow[0]
                wordids.append(wordid)
                if tablenumber > 0:
                    tablelist += ','
                    clauselist += ' and '
                    clauselist += 'w%d.urlid=w%d.urlid and ' % (tablenumber -
                                                                1, tablenumber)
                fieldlist += ',w%d.location' % tablenumber
                tablelist += 'wordlocation w%d' % tablenumber
                clauselist += 'w%d.wordid=%d' % (tablenumber, wordid)
                tablenumber += 1
        if (len(clauselist.strip()) == 0): return None
        # Create the query from the separate parts
        fullquery = 'select %s from %s where %s' % (fieldlist, tablelist,
                                                    clauselist)
        print(fullquery)
        cur = self.conn.execute(fullquery)
        rows = [row for row in cur]
        return rows, wordids

    def getscoredlist(self, rows, wordids):
        # row fromat : [urlid, loc1, loc2...]
        totalscores = dict([(row[0], 0) for row in rows])
        # This is where you'll later put the scoring functions
        weights = []
        weights = [(1.0, self.frequencyscore(rows)),
                   (1.5, self.locationscore(rows))]
        for (weight, scores) in weights:
            for url in totalscores:
                totalscores[url] += weight * scores[url]
        return totalscores

    def geturlname(self, id):
        return self.conn.execute("select url from urllist where rowid=%d" %
                                 id).fetchone()[0]

    def query(self, q):
        rows, wordids = self.getmatchrows(q)
        scores = self.getscoredlist(rows, wordids)
        # sorted([(s,url),(s,url),...],reverse=1)
        rankedscores = sorted([(score, url)
                               for (url, score) in scores.items()],
                              reverse=1)
        for (score, urlid) in rankedscores[0:10]:
            print '%f\t%s' % (score, self.geturlname(urlid))
        # add after nn
        return wordids, [r[1] for r in rankedscores[0:10]]

    def normalizescores(self, scores, smallIsBetter=0):
        vsmall = 0.00001  # Avoid division by zero errors
        if smallIsBetter:
            minscore = min(scores.values())
            return dict([(u,float(minscore)/max(vsmall,l)) for (u,l) \
                in scores.items()])
        else:
            maxscore = max(scores.values())
            if maxscore == 0: maxscore = vsmall
            return dict([(u, float(c) / maxscore)
                         for (u, c) in scores.items()])

    def frequencyscore(self, rows):
        # init, (urlid, freq)
        counts = dict([(row[0], 0) for row in rows])
        for row in rows:
            counts[row[0]] += 1
        return self.normalizescores(counts)

    def locationscore(self, rows):
        locations = dict([(row[0], 1000000) for row in rows])
        for row in rows:
            loc = sum(row[1:])
            if loc < locations[row[0]]: locations[row[0]] = loc
        return self.normalizescores(locations, smallIsBetter=1)

    def distancescore(self, rows):
        # If there's only one word, everyone wins!
        if len(rows[0]) <= 2: return dict([(row[0], 1.0) for row in rows])
        # Initialize the dictionary with large values
        mindistance = dict([(row[0], 1000000) for row in rows])
        for row in rows:
            dist = sum([abs(row[i] - row[i - 1]) for i in range(2, len(row))])
            if dist < mindistance[row[0]]: mindistance[row[0]] = dist
        return self.normalizescores(mindistance, smallIsBetter=1)

    def inboundlinkscore(self, rows):
        uniqueurls = set([row[0] for row in rows])
        inboundcount=dict([(u,self.con.execute( \
            'select count(*) from link where toid=%d' % u).fetchone()[0]) for u in uniqueurls])
        return self.normalizescores(inboundcount)

    def calculatepagerank(self, iterations=20):
        # clear out the current PageRank tables
        self.conn.execute('drop table if exists pagerank')
        self.conn.execute('create table pagerank(urlid primary key,score)')
        # initialize every url with a PageRank of 1
        self.conn.execute(
            'insert into pagerank select rowid, 1.0 from urllist')
        self.dbcommit()
        for i in range(iterations):
            print "Iteration %d" % (i)
            for (urlid, ) in self.conn.execute('select rowid from urllist'):
                pr = 0.15
                # Loop through all the pages that link to this one
                for (linker, ) in self.conn.execute(
                        'select distinct fromid from link where toid=%d' %
                        urlid):
                    # Get the PageRank of the linker
                    linkingpr = self.conn.execute(
                        'select score from pagerank where urlid=%d' %
                        linker).fetchone()[0]
                    # Get the total number of links from the linker
                    linkingcount = self.con.execute(
                        'select count(*) from link where fromid=%d' %
                        linker).fetchone()[0]
                    pr += 0.85 * (linkingpr / linkingcount)
                self.conn.execute(
                    'update pagerank set score=%f where urlid=%d' %
                    (pr, urlid))
            self.dbcommit()

    def pagerankscore(self, rows):
        pageranks = dict([
            (row[0],
             self.con.execute('select score from pagerank where urlid=%d' %
                              row[0]).fetchone()[0]) for row in rows
        ])
        maxrank = max(pageranks.values())
        normalizedscores = dict([(u, float(l) / maxrank)
                                 for (u, l) in pageranks.items()])
        return normalizedscores

    def linktextscore(self, rows, wordids):
        linkscores = dict([(row[0], 0) for row in rows])
        for wordid in wordids:
            cur = self.con.execute(
                'select link.fromid,link.toid from linkwords,link where wordid=%d and linkwords.linkid=link.rowid'
                % wordid)
            for (fromid, toid) in cur:
                if toid in linkscores:
                    pr = self.con.execute(
                        'select score from pagerank where urlid=%d' %
                        fromid).fetchone()[0]
                    linkscores[toid] += pr
        maxscore = max(linkscores.values())
        normalizedscores = dict([(u, float(l) / maxscore)
                                 for (u, l) in linkscores.items()])
        return normalizedscores

    import nn
    mynet = nn.searchnet('nn.db')

    def nnscore(self, rows, wordids):
        # Get unique URL IDs as an ordered list
        urlids = [urlid for urlid in set([row[0] for row in rows])]
        nnres = mynet.getresult(wordids, urlids)
        scores = dict([(urlids[i], nnres[i]) for i in range(len(urlids))])
        return self.normalizescores(scores)
Esempio n. 22
0
# -*- coding: utf-8 -*-
import traceback
import urllib2
from BeautifulSoup import *
from urlparse import urljoin
import sqlite3 as sqlite
import nn

mynet = nn.searchnet('output/nn.db')

# Create a list of words to ignore
ignorewords = {
    'the': 1,
    'of': 1,
    'to': 1,
    'and': 1,
    'a': 1,
    'in': 1,
    'is': 1,
    'it': 1
}


class crawler:
    # Initialize the crawler with the name of database
    def __init__(self, dbname):
        self.con = sqlite.connect(dbname)

    def __del__(self):
        self.con.close()
Esempio n. 23
0
	def __init__(self,dbname):
		self.con=sqlite.connect(dbname)
		self.mynet=nn.searchnet('nn.db')
Esempio n. 24
0
from urllib.request import urlopen, urljoin
from bs4 import BeautifulSoup
import pymysql
import re
import jieba
import time
import nn

mynet = nn.searchnet('test')


class crawler:
    # Initialize the crawler with the name of database
    def __init__(self, dbname):
        self.con = pymysql.connect(host="*****",
                                   port=3306,
                                   user="******",
                                   password="******",
                                   database=dbname,
                                   charset="utf8",
                                   use_unicode=True)
        self.cur = self.con.cursor()

    def __del__(self):
        #self.cur.close()
        self.con.close()

    def dbcommit(self):
        self.con.commit()

    #return the ID of an entry
Esempio n. 25
0
get_ipython().magic(u"logstart example.py append")
import nn

online, pharmacy = 1, 2
spam, notspam = 1, 2
possible = [spam, notspam]
neuralnet = nn.searchnet("nntest.db")
neuralnet.maketables()
neuralnet.trainquery([online], possible, notspam)
neuralnet.trainquery([online, pharmacy], possible, spam)
neuralnet.trainquery([pharmacy], possible, notspam)
neuralnet.getresult([online, pharmacy], possible)
neuralnet.getresult([online], possible)
neuralnet.trainquery([online], possible, notspam)
neuralnet.getresult([online], possible)
neuralnet.trainquery([online], possible, notspam)
neuralnet.getresult([online], possible)
quit()
import os
import re
import urllib2
import urlparse

from pysqlite2 import dbapi2 as sqlite
from BeautifulSoup import BeautifulSoup

import nn
net = nn.searchnet('nn.db')  # XXX: somehow train this from user clicks

ignorewords = set(['the', 'of', 'to', 'and', 'a', 'in', 'is', 'it'])


# XXX: the root page (amnoid.de) is indexed twice for some reason (e.g.
#   select * from links where toid = 2;
# shows the link 1->2 two times.
class crawler:
    def __init__(self, dbname):
        self.con = sqlite.connect(dbname)

    def __del__(self):
        self.con.close()

    def dbcommit(self):
        self.con.commit()

    def getentryid(self, table, field, value, createnew=True):
        """Returns an entry id and creates it if it is not present."""
        cur = self.con.execute('select rowid from %s where %s="%s"' %
                               (table, field, value))
Esempio n. 27
0
#!/Users/kawasakitaku/Documents/python-PVM/ln-python3.4/bin/python3.4

import searchengine as s
import nn
from flask import Flask
from flask import render_template, request
import os


mynet = nn.searchnet("nn.db")

tempate = os.path.join(os.getcwd(),'templates')
app = Flask(__name__,template_folder=tempate)

def makeindex(key):
  e = s.searcher('searchindex.db')
  result = e.query(key)
  List = []
  size = len(result)
  for i in range(size):
    for j in result[i]:
      List.append(e.geturlname(j))
  return List


@app.route('/',methods=['GET','POST'])
def index():
  if request.method == 'POST':
    keyword = request.form['keyword']
    res_list = makeindex(keyword)
    if keyword:
Esempio n. 28
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-

import bottle
import searchengine
import nn
import beaker.middleware
import os

mynet = nn.searchnet()

session_opts = {
    'session.type': 'file',
    'session.data_dir': './session/',
    'session.auto': True,
}
app = beaker.middleware.SessionMiddleware(bottle.app(), session_opts)

@bottle.route('/') 
def querypage():
    return bottle.template('query')

@bottle.route('/resume')
def resumepage():
    return bottle.template('resume')

@bottle.route('/query', method='POST')
def queryhandler():
    e = searchengine.searcher()
    q = bottle.request.forms.get("query")
    mywords, myurls = e.query(q)
class searcher:
    def __init__(self, dbname):
        self.con = sqlite.connect(dbname)

    def __del__(self):
        self.con.close()

    def getmatchrows(self, q):
        #构造查询的字符串
        fieldlist = 'w0.urlid'
        tablelist = ''
        clauselist = ''
        wordids = []

        #根据空格拆分单词
        words = q.split(' ')
        #print words#测试
        tablenumber = 0

        for word in words:
            #获取单词的ID
            wordrow = self.con.execute(
                "select rowid from wordlist where word='%s'" %
                word).fetchone()
            #print 'wordrow = %s' % wordrow #测试
            if wordrow != None:
                wordid = wordrow[0]
                wordids.append(wordid)
                if tablenumber > 0:
                    tablelist += ','
                    #clauselist += ' and '
                    clauselist += ' and w%d.urlid=w%d.urlid and ' % (
                        tablenumber - 1, tablenumber)
                fieldlist += ',w%d.location' % tablenumber
                tablelist += 'wordlocation w%d' % tablenumber
                clauselist += 'w%d.wordid=%d' % (tablenumber, wordid)
                tablenumber += 1
            else:
                continue

        if '' == clauselist:
            return None, None
        #根据各个组分,建立查询
        #运行错误:sqlite3.OperationalError: near "where": syntax error
        fullquery = 'select %s from %s where %s' % (fieldlist, tablelist,
                                                    clauselist)
        cur = self.con.execute(fullquery)
        rows = [row for row in cur]
        # print rows #测试
        return rows, wordids

    def getscoredlist(self, rows, wordids):
        totalscores = dict((row[0], 0) for row in rows)

        #此处是稍后放置评价函数的地方
        weights = [(1.0, self.frequencyscore(rows)),
                   (1.0, self.locationscore(rows)),
                   (1.0, self.pagerankscore(rows)),
                   (1.0, self.linktextscore(rows, wordids))]

        for (weight, scores) in weights:
            for url in totalscores:
                totalscores[url] += weight * scores[url]

        return totalscores

    def geturlname(self, id):
        return self.con.execute("select url from urllist where rowid=%d" %
                                id).fetchone()[0]

    def query(self, q):
        rows, wordids = self.getmatchrows(q)
        scores = self.getscoredlist(rows, wordids)
        rankedscores = sorted([(score, url)
                               for (url, score) in scores.items()],
                              reverse=1)
        for (score, urlid) in rankedscores[0:10]:
            print '%f\t%s' % (score, self.geturlname(urlid))
        return wordids, [r[1] for r in rankedscores[0:10]]

    #归一化,1表示接近,0表示很远
    def normalizescores(self, scores, smallIsBetter=0):
        vsmall = 0.00001  #避免被零整除
        if smallIsBetter:
            minscore = min(scores.values())
            return dict([(u, float(minscore) / max(vsmall, l))
                         for (u, l) in scores.items()])
        else:
            maxscore = max(scores.values())
            return dict([(u, float(c) / maxscore)
                         for (u, c) in scores.items()])

    def frequencyscore(self, rows):
        counts = dict([(row[0], 0) for row in rows])
        for row in rows:
            counts[row[0]] += 1
        return self.normalizescores(counts)

    def locationscore(self, rows):
        locations = dict([(row[0], 1000000) for row in rows])
        for row in rows:
            loc = sum(row[1:])
            if loc < locations[row[0]]: locations[row[0]] = loc

        return self.normalizescores(locations, smallIsBetter=1)

    def distancescore(self, rows):
        #如果只有一个单词,则得分都一样
        if len(rows[0]) <= 2: return dict([(row[0], 1.0) for row in rows])

        #初始化字典,并填入一个很大的数
        mindistance = dict([(row[0], 100000) for row in rows])

        for row in rows:
            dist = sum([abs(row[i] - row[i - 1]) for i in range(2, len(row))])
            if dist < mindistance[row[0]]: mindistance[row[0]] = dist
        return self.normalizescores(mindistance, smallIsBetter=1)

    def inboundlinkscore(self, rows):
        uniqueurls = set([row[0] for row in rows])
        inboundcount = dict([
            (u, self.con.execute('select count(*) from link where toid=%d' %
                                 u).fetchone()[0]) for u in uniqueurls
        ])
        return self.normalizescores(inboundcount)

    def pagerankscore(self, rows):
        pageranks = dict([
            (row[0],
             self.con.execute('select score from pagerank where urlid=%d' %
                              row[0]).fetchone()[0]) for row in rows
        ])
        maxrank = max(pageranks.values())
        normalizescores = dict([(u, float(l) / maxrank)
                                for (u, l) in pageranks.items()])
        return normalizescores

    def linktextscore(self, rows, wordids):
        linkscores = dict([(row[0], 0.00001) for row in rows])
        for wordid in wordids:
            cur = self.con.execute(
                'select link.fromid,link.toid from linkwords,link where wordid=%d and linkwords.linkid=link.rowid'
                % wordid)
            for (fromid, toid) in cur:
                if toid in linkscores:
                    pr = self.con.execute(
                        'select score from pagerank where urlid=%d' %
                        fromid).fetchone()[0]
                    linkscores[toid] += pr
        maxscore = max(linkscores.values())
        normalizescores = dict([(u, float(l) / maxscore)
                                for (u, l) in linkscores.items()])
        return normalizescores

    mynet = nn.searchnet('nn.db')

    def nnscore(self, rows, wordids):
        #获得一个由唯一的URL ID 构成的有序列表
        urlids = [urlid for urlid in set([row[0] for row in rows])]
        nnres = mynet.getresult(wordids, urlids)
        scores = dict([(urlidd[i], nnres[i]) for i in range(len(urlids))])
        return self.normalizescores(scores)
Esempio n. 30
0
import os
import nn

db_file = 'nn.db'
if os.path.exists(db_file):
    os.unlink(db_file)
    print 'delete db file:', db_file

mynet = nn.searchnet(db_file)
mynet.maketables()

w_world, w_river, w_bank = 101, 102, 103
u_worldbank, u_river, u_earth = 201, 202, 203

#mynet.generate_hidden_node([w_world, w_bank], [u_worldbank, u_river, u_earth])



#result = mynet.getresult([w_world, w_bank], [u_worldbank, u_river, u_earth])
#print "result:", result
#print 'show hiddennode:'
#for c in mynet.con.execute('select * from hiddennode'): print c
#print 'show wordhidden:'
#for c in mynet.con.execute('select * from wordhidden'): print c
#print 'show hiddenurl:'
#for c in mynet.con.execute('select * from hiddenurl'): print c

#mynet.train_query([w_world, w_bank], [u_worldbank, u_river, u_earth], u_worldbank)
#result = mynet.getresult([w_world, w_bank], [u_worldbank, u_river, u_earth])
#print "result:", result
import os
import re
import urllib2
import urlparse

from pysqlite2 import dbapi2 as sqlite
from BeautifulSoup import BeautifulSoup

import nn
net = nn.searchnet('nn.db')  # XXX: somehow train this from user clicks

ignorewords = set(['the', 'of', 'to', 'and', 'a', 'in', 'is', 'it'])


# XXX: the root page (amnoid.de) is indexed twice for some reason (e.g.
#   select * from links where toid = 2;
# shows the link 1->2 two times.
class crawler:

  def __init__(self, dbname):
    self.con = sqlite.connect(dbname)

  def __del__(self):
    self.con.close()

  def dbcommit(self):
    self.con.commit()

  def getentryid(self, table, field, value, createnew=True):
    """Returns an entry id and creates it if it is not present."""
    cur = self.con.execute('select rowid from %s where %s="%s"'
Esempio n. 32
0
 def neural_network(self, rows, wordids):
     urlids = [urlid for urlid in set([row[0] for row in rows])]
     network_nel = nn.searchnet('nn.db')
     node_output = network_nel.getresult(wordids, urlids)
     net_score = dict([(urlids[i], node_output[i]) for i in range(len(urlids))])
     return self.normalizescores(net_score)
Esempio n. 33
0
# crawler.createindextables()

crawler = searchengine.crawler("searchindex.db")
# crawler.createindextables()
# pages=['http://kiwitobes.com/wiki/Categorical_list_of_programming_languages.html']
# crawler.crawl(pages)


# crawler.calculatepagerank()

# search
# e=searchengine.searcher('searchindex.db')
# e.query('function programming')

import nn

mynn = nn.searchnet("nndb.db")
# mynn.maketables()
kaka1 = mynn.getstrength(0, 5, 0)
kaka1 = mynn.getstrength(0, 5, 1)

mynn.setstrength(0, 5, 0, 3)
mynn.setstrength(0, 5, 1, 2)


kaka1 = mynn.getstrength(0, 5, 0)
kaka1 = mynn.getstrength(0, 5, 1)


print("Hello World")
Esempio n. 34
0
import nn

a = nn.searchnet('Truth.db')

a.train('I did not have sexual relations with that woman, Miss Kravinsky.', 0)
print("Training...")

a.train('I do not believe in god as he is who never was.', 0)
print("Training...")

a.train("I didn't see him go, I promise.", 1)
print("Training...")

a.train('He went in and kissed her ass off.', 1)
print("Training...")

a.train('I am very sorry for your loss, Johnny was like a brother to me.', 1)
print("Training...")

a.train(
    'Did you say something, mister Greene. Because, well I am sorry for saying this but you can go shit your idea.',
    1)
print("Training...")

a.train(
    'In all candor, I did not do that murder. This is my plea, not guilty.', 0)
print("Training...")

a.train(
    'Honestly, I am very nice and so good and kind. You all should learn a thing or two from me.',
    0)
import urllib2
from BeautifulSoup import *
from urlparse import urljoin
from pysqlite2 import dbapi2 as sqlite
import nn
mynet=nn.searchnet('nn.db')

# Create a list of words to ignore
ignorewords={'the':1,'of':1,'to':1,'and':1,'a':1,'in':1,'is':1,'it':1}


class crawler:
  # Initialize the crawler with the name of database
  def __init__(self,dbname):
    self.con=sqlite.connect(dbname)

  def __del__(self):
    self.con.close()

  def dbcommit(self):
    self.con.commit()

  # Auxilliary function for getting an entry id and adding
  # it if it's not present
  def getentryid(self,table,field,value,createnew=True):
    cur=self.con.execute(
    "select rowid from %s where %s='%s'" % (table,field,value))
    res=cur.fetchone()
    if res==None:
      cur=self.con.execute(
      "insert into %s (%s) values ('%s')" % (table,field,value))
Esempio n. 36
0
        pageranks=dict([(row[0],self.curs.execute('select score from pagerank where urlid=%d' % row[0]).fetchall()[0][0]) for row in rows])
        maxrank=max(pageranks.values())   #求最大的pagerank值
        for urlid in pageranks:
            pageranks[urlid] /= maxrank   #归一化
        return pageranks   #返回归一化的url的pagerank

    # 根据神经网络(用户点击行为学习)进行评价的函数。神经网络在nn.py中实现。#rows是[urlid,wordlocation1,wordlocation2,wordlocation3...]
    def nnscore(self,rows,wordids):
        # 获得一个由唯一的url id构成的有序列表
        urlids=[urlid for urlid in dict([(row[0],1) for row in rows])]
        nnres=mynet.getresult(wordids,urlids)
        scores=dict([(urlids[i],nnres[i]) for i in range(len(urlids))])
        return self.normalizescores(scores)


mynet=nn.searchnet('csdn.db')
if __name__ == '__main__':
    mysearcher= searcher('csdn.db')
    searchkey = input("搜索关键词>")
    wordids,urlids=mysearcher.query(searchkey)
    # print(wordids,urlids)
    selurlid= input("选中链接id>")
    selurlid = int(selurlid)
    mynet.trainquery(wordids, urlids,selurlid) #根据用户选择的链接进行训练






Esempio n. 37
0
#coding:utf-8
import urllib2
from BeautifulSoup import *
from urlparse import urljoin
from sqlite3 import dbapi2 as sqlite
import nn
mynet=nn.searchnet('nn.db')

# Create a list of words to ignore
ignorewords={'the':1,'of':1,'to':1,'and':1,'a':1,'in':1,'is':1,'it':1}

class crawler:
  # Initialize the crawler with the name of database
  def __init__(self,dbname):
    self.con=sqlite.connect(dbname)

  def __del__(self):
    self.con.close()

  def dbcommit(self):
    self.con.commit()

  # Auxilliary function for getting an entry id and adding
  # it if it's not present
  def getentryid(self,table,field,value,createnew=True):
    cur=self.con.execute(
    "select rowid from %s where %s='%s'" % (table,field,value))
    res=cur.fetchone()
    if res==None:
      cur=self.con.execute(
      "insert into %s (%s) values ('%s')" % (table,field,value))
Esempio n. 38
0
 #coding=utf-8
import nn
mynet=nn.searchnet('nn.db')#创建数据库
#mynet.maketables()#创建数据表
wWorld,wRiver,wBank=101,102,103
uWorldBank,uRiver,uEarth=201,202,203
mynet.generatehiddennode([wWorld,wBank],[uWorldBank,uRiver,uEarth])
for c in mynet.con.execute('select * from wordhidden'): print c
for c in mynet.con.execute('select * from hiddenurl'): print c