def nnscore(self, rows, wordids): # Obtain unique urlids urlids = [row[0] for row in rows] urlids = list(set(urlids)) nnres = _nn.searchnet().getresult(wordids, urlids) # nn output in same order(only decimal numbers are returned) scores = dict([(urlids[i], nnres[i]) for i in range(len(urlids))]) # [(urlid,score), (,)] return self.normalizescores(scores) # {urlid: score), (,)}
def nnscore(self, rows, wordids): #Obtain unique urlids urlids = [row[0] for row in rows] urlids = list(set(urlids)) nnres = _nn.searchnet().getresult( wordids, urlids ) #nn output in same order(only decimal numbers are returned) scores = dict([(urlids[i], nnres[i]) for i in range(len(urlids))]) #[(urlid,score), (,)] return self.normalizescores(scores) #{urlid: score), (,)}
from bs4 import * from urlparse import urljoin from google.appengine.ext import db from google.appengine.api import users from google.appengine.ext.db import stats import _searcher import _nn import stopwords #List of words to ignore #Can use TF-IDF instead import StemmerFile import HTML # Global Variables mys = _searcher.searcher() myn = _nn.searchnet() wordids = [] urlids = [] mycheck = 0 porter = StemmerFile.PorterStemmer() #[1]CRAWLING and BUILDING INDEX----------------------------------------------------------------------- class Urllist(db.Model): url = db.StringProperty(indexed=True) title = db.StringProperty(multiline=True) description = db.StringProperty(multiline=True) date = db.DateTimeProperty(auto_now_add=True)
from urlparse import urljoin from google.appengine.ext import db from google.appengine.api import users from google.appengine.ext.db import stats import _searcher import _nn import stopwords #List of words to ignore #Can use TF-IDF instead import StemmerFile import HTML # Global Variables mys=_searcher.searcher() myn=_nn.searchnet() wordids=[] urlids=[] mycheck=0 porter=StemmerFile.PorterStemmer() #[1]CRAWLING and BUILDING INDEX----------------------------------------------------------------------- class Urllist(db.Model): url = db.StringProperty(indexed=True) title = db.StringProperty(multiline=True) description = db.StringProperty(multiline=True) date = db.DateTimeProperty(auto_now_add=True) class Wordlist(db.Model):