def nnscore(self, rows, wordids):
     # Obtain unique urlids
     urlids = [row[0] for row in rows]
     urlids = list(set(urlids))
     nnres = _nn.searchnet().getresult(wordids, urlids)  # nn output in same order(only decimal numbers are returned)
     scores = dict([(urlids[i], nnres[i]) for i in range(len(urlids))])  # [(urlid,score), (,)]
     return self.normalizescores(scores)  # {urlid: score), (,)}
Example #2
0
 def nnscore(self, rows, wordids):
     #Obtain unique urlids
     urlids = [row[0] for row in rows]
     urlids = list(set(urlids))
     nnres = _nn.searchnet().getresult(
         wordids, urlids
     )  #nn output in same order(only decimal numbers are returned)
     scores = dict([(urlids[i], nnres[i])
                    for i in range(len(urlids))])  #[(urlid,score), (,)]
     return self.normalizescores(scores)  #{urlid: score), (,)}
Example #3
0
from bs4 import *
from urlparse import urljoin

from google.appengine.ext import db
from google.appengine.api import users
from google.appengine.ext.db import stats

import _searcher
import _nn
import stopwords  #List of words to ignore #Can use TF-IDF instead
import StemmerFile
import HTML

# Global Variables
mys = _searcher.searcher()
myn = _nn.searchnet()

wordids = []
urlids = []
mycheck = 0
porter = StemmerFile.PorterStemmer()

#[1]CRAWLING and BUILDING INDEX-----------------------------------------------------------------------


class Urllist(db.Model):
    url = db.StringProperty(indexed=True)
    title = db.StringProperty(multiline=True)
    description = db.StringProperty(multiline=True)
    date = db.DateTimeProperty(auto_now_add=True)
from urlparse import urljoin

from google.appengine.ext import db
from google.appengine.api import users
from google.appengine.ext.db import stats


import _searcher
import _nn
import stopwords         #List of words to ignore #Can use TF-IDF instead
import StemmerFile
import HTML

# Global Variables
mys=_searcher.searcher()
myn=_nn.searchnet()

wordids=[]
urlids=[]
mycheck=0
porter=StemmerFile.PorterStemmer()

#[1]CRAWLING and BUILDING INDEX-----------------------------------------------------------------------

class Urllist(db.Model):
    url = db.StringProperty(indexed=True)
    title = db.StringProperty(multiline=True)
    description = db.StringProperty(multiline=True)
    date = db.DateTimeProperty(auto_now_add=True)    
    
class Wordlist(db.Model):