Python IndexHunt Examples

Programming Language: Python

Namespace/Package Name: IndexHunt

Class/Type: IndexHunt

Examples at hotexamples.com: 2

Python IndexHunt - 2 examples found. These are the top rated real world Python examples of IndexHunt.IndexHunt extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

find(1)

insert(1)

update(1)

Example #1

Show file

File: search.py Project: vigneshsarma/webhunter

 def __init__(self):
     #db = Connection().web_hunter
     self.index, self.graph = IndexHunt(),GraphHunt()
     
     self.crawl_web('http://udacity.com/cs101x/urank/index.html')
     self.ranks = {}
     self.compute_ranks()

Example #2

Show file

File: search.py Project: vigneshsarma/webhunter

class WebHunter:
    def __init__(self):
        #db = Connection().web_hunter
        self.index, self.graph = IndexHunt(),GraphHunt()
        
        self.crawl_web('http://udacity.com/cs101x/urank/index.html')
        self.ranks = {}
        self.compute_ranks()
        #db.index.insert(self.index)

    def qsort(self,tosort,ranks):
        if tosort == []: 
            return []
        else:
            pivot = tosort[0]
            lesser = self.qsort([x for x in tosort[1:] if ranks[x] < ranks[pivot]],ranks)
            greater = self.qsort([x for x in tosort[1:] if ranks[x] >= ranks[pivot]],ranks)
            return greater + [pivot] + lesser

    def ordered_search(self, keyword):
        keyword=keyword.lower()
        links = self.index.find(keyword)
        if links:
            return self.qsort(links,self.ranks)
        return None

    def get_page(self,url):
        if url in cache:
            return cache[url]
        return ""


    def union(self,a, b):
        for e in b:
            if e not in a:
                a.append(e)

    def add_page_to_index(self, url, content):
        words = content.split()
        for word in words:
            self.add_to_index( word, url)

    def add_to_index(self, keyword, url):
        links = self.index.find(keyword)
        if links:
            if url not in links:
                links.append(url)
                self.index.update(keyword,links)
        else:
            self.index.insert(keyword ,url)

    def lookup(self, keyword):
        links = self.index.find(keyword)
        if links:
            return links
        else:
            return None

    def crawl_web(self,seed): # returns index, graph of inlinks
        tocrawl = [seed]
        crawled = []
        while tocrawl: 
            page = tocrawl.pop()
            if page not in crawled:
                content = self.get_page(page)
                outlinks,content,title = LinkFinder().start_parsing(content)
                self.add_page_to_index( page, content)
                self.graph.insert(page,outlinks,title,content)
                self.union(tocrawl, outlinks)
                crawled.append(page)

    def compute_ranks(self):
        d = 0.8 # damping factor
        numloops = 10

        npages = self.graph.size()

        for page in self.graph.find():
            self.ranks[page["url"]] = 1.0 / npages

        for i in range(0, numloops):
            newranks = {}
            graph = self.graph.find()
            for page in graph:
                
                newrank = (1 - d) / npages
                for node in graph:
                    #print page["url"], node["links"]
                    if page["url"] in node["links"]:
                        newrank = newrank + d * (self.ranks[node["url"]] / len(node["links"]))
                        
                newranks[page["url"]] = newrank
            self.ranks = newranks