def test(seed): print 'Crawling web . . .' index, graph = crawl_web(seed) print 'Calculating Page Rank . . .' ranks = page_rank(graph) print ranks print ordered_search(index, ranks, 'dogs') print lucky_search(index, ranks, 'cats')
def test_engine(): index, graph = crawl_web('http://www.udacity.com/cs101x/index.html') ranks = compute_ranks(graph) #print index print "_+_+_+_++_+_++_+_+_+_+_++_+_+_++" print lucky_search(index, ranks, 'walking') #>>> https://www.udacity.com/cs101x/index.html print lucky_search(index, ranks, 'kicking') #>>> https://www.udacity.com/cs101x/crawling.html print lucky_search(index, ranks, 'Ossifrage') #>>> https://www.udacity.com/cs101x/flying.html print lucky_search(index, ranks, 'ossifrage') #>>> None print "_+_+_+_++_+_++_+_+_+_+_++_+_+_++" print ordered_search(index, ranks, 'to') #>>> https://www.udacity.com/cs101x/index.html print ordered_search(index, ranks, 'Ossifrage') #>>> https://www.udacity.com/cs101x/flying.html print ordered_search(index, ranks, 'crawl') #>>> index crawling print ordered_search(index, ranks, 'ossifrage')
def test_engine(): print "Testing..." kathleen = 'http://udacity.com/cs101x/urank/kathleen.html' nickel = 'http://udacity.com/cs101x/urank/nickel.html' arsenic = 'http://udacity.com/cs101x/urank/arsenic.html' hummus = 'http://udacity.com/cs101x/urank/hummus.html' indexurl = 'http://udacity.com/cs101x/urank/index.html' corpus = crawl_web('http://udacity.com/cs101x/urank/index.html') assert lucky_search(corpus, 'Hummus') == kathleen assert ordered_search(corpus, 'Hummus') == [kathleen, nickel, arsenic, hummus, indexurl] assert lucky_search(corpus, 'the') == nickel assert ordered_search(corpus, 'the') == [nickel, arsenic, hummus, indexurl] assert lucky_search(corpus, 'babaganoush') == None assert ordered_search(corpus, 'babaganoush') == None print "Finished tests."
def post(self): corpus, graph = crawl_web('http://udacity.com/cs101x/urank/index.html') ranks = compute_ranks(graph) query = self.request.get('text') result = lucky_search(corpus, ranks, query) if not result: self.render(text="", links="try www.google.com") else: self.render(text=query, links=result)
def post(self): corpus, graph = crawl_web('http://udacity.com/cs101x/urank/index.html') ranks = compute_ranks(graph) query = self.request.get('text') result = lucky_search(corpus, ranks, query) if not result: self.render(text = "", links = "try www.google.com") else: self.render(text = query, links = result)
def test_engine(): print "Testing..." kathleen = "http://udacity.com/cs101x/urank/kathleen.html" nickel = "http://udacity.com/cs101x/urank/nickel.html" arsenic = "http://udacity.com/cs101x/urank/arsenic.html" hummus = "http://udacity.com/cs101x/urank/hummus.html" indexurl = "http://udacity.com/cs101x/urank/index.html" corpus = crawl_web("http://udacity.com/cs101x/urank/index.html") fname = "corpus.pkl" # YOUR CODE HERE assert lucky_search(corpus, "Hummus") == kathleen assert ordered_search(corpus, "Hummus") == [kathleen, nickel, arsenic, hummus, indexurl] assert lucky_search(corpus, "the") == nickel assert ordered_search(corpus, "the") == [nickel, arsenic, hummus, indexurl] assert lucky_search(corpus, "babaganoush") == None assert ordered_search(corpus, "babaganoush") == None print "Finished tests."
def test_engine(): print "Testing..." index, graph = crawl_web('http://udacity.com/cs101x/urank/index.html') ranks = compute_ranks(graph) kathleen = 'http://udacity.com/cs101x/urank/kathleen.html' nickel = 'http://udacity.com/cs101x/urank/nickel.html' arsenic = 'http://udacity.com/cs101x/urank/arsenic.html' hummus = 'http://udacity.com/cs101x/urank/hummus.html' indexurl = 'http://udacity.com/cs101x/urank/index.html' # print lucky_search(index, ranks, 'Hummus') assert lucky_search(index, ranks, 'Hummus') == kathleen #print ordered_search(index, ranks, 'Hummus') assert ordered_search(index, ranks, 'Hummus') == [kathleen, nickel, arsenic, hummus, indexurl] #print lucky_search(index, ranks, 'the') assert lucky_search(index, ranks, 'the') == nickel #print ordered_search(index, ranks, 'the') assert ordered_search(index, ranks, 'the') == [nickel, arsenic, hummus, indexurl] #print lucky_search(index, ranks, 'babaganoush') assert lucky_search(index, ranks, 'babaganoush') == None assert ordered_search(index, ranks, 'babaganoush') == None print "Finished tests."
def test_engine(): print "Testing..." index, graph = crawl_web('http://udacity.com/cs101x/urank/index.html') ranks = compute_ranks(graph) kathleen = 'http://udacity.com/cs101x/urank/kathleen.html' nickel = 'http://udacity.com/cs101x/urank/nickel.html' arsenic = 'http://udacity.com/cs101x/urank/arsenic.html' hummus = 'http://udacity.com/cs101x/urank/hummus.html' indexurl = 'http://udacity.com/cs101x/urank/index.html' # print lucky_search(index, ranks, 'Hummus') assert lucky_search(index, ranks, 'Hummus') == kathleen #print ordered_search(index, ranks, 'Hummus') assert ordered_search(index, ranks, 'Hummus') == [ kathleen, nickel, arsenic, hummus, indexurl ] #print lucky_search(index, ranks, 'the') assert lucky_search(index, ranks, 'the') == nickel #print ordered_search(index, ranks, 'the') assert ordered_search(index, ranks, 'the') == [nickel, arsenic, hummus, indexurl] #print lucky_search(index, ranks, 'babaganoush') assert lucky_search(index, ranks, 'babaganoush') == None assert ordered_search(index, ranks, 'babaganoush') == None print "Finished tests."
def GET(self, query): result = lucky_search(corpus, ranks, query) return result
def test_engine(): print "Testing..." kathleen = 'http://udacity.com/cs101x/urank/kathleen.html' nickel = 'http://udacity.com/cs101x/urank/nickel.html' arsenic = 'http://udacity.com/cs101x/urank/arsenic.html' hummus = 'http://udacity.com/cs101x/urank/hummus.html' indexurl = 'http://udacity.com/cs101x/urank/index.html' corpus = crawl_web('http://udacity.com/cs101x/urank/index.html') fname = 'corpus.pkl' # YOUR CODE HERE try: with open(fname, 'w') as fout: pickle.dump(corpus, fout) print "Successfully wrote to " + fname except IOError, e: print "Most odacious! Cannot write to corpus " + str(e) assert lucky_search(corpus, 'Hummus') == kathleen assert ordered_search( corpus, 'Hummus') == [kathleen, nickel, arsenic, hummus, indexurl] assert lucky_search(corpus, 'the') == nickel assert ordered_search(corpus, 'the') == [nickel, arsenic, hummus, indexurl] assert lucky_search(corpus, 'babaganoush') == None assert ordered_search(corpus, 'babaganoush') == None print "Finished tests." test_engine()
def GET(self, query): result = lucky_search(corpus, ranks, query) if result == None: return "Try searchwithpeter.info" return result
def GET(self, query): result = lucky_search(corpus, ranks, query) if result: return result else: return "Try searchwithpeter.info."
from search import lucky_search, ordered_search import pickle def test_engine(): print "Testing..." kathleen = 'http://udacity.com/cs101x/urank/kathleen.html' nickel = 'http://udacity.com/cs101x/urank/nickel.html' arsenic = 'http://udacity.com/cs101x/urank/arsenic.html' hummus = 'http://udacity.com/cs101x/urank/hummus.html' indexurl = 'http://udacity.com/cs101x/urank/index.html' corpus = crawl_web('http://udacity.com/cs101x/urank/index.html') fname = 'corpus.pkl' try: with open(fname, 'w') as fout: pickle.dump(corpus, fout) print "Pickled file to " + fname except IOError, e: print "Failed to write to file: " + str(e) assert lucky_search(corpus, 'Hummus') == kathleen assert ordered_search(corpus, 'Hummus') == [kathleen, nickel, arsenic, hummus, indexurl] assert lucky_search(corpus, 'the') == nickel assert ordered_search(corpus, 'the') == [nickel, arsenic, hummus, indexurl] assert lucky_search(corpus, 'babaganoush') == None assert ordered_search(corpus, 'babaganoush') == None print "Finished tests." test_engine()
from crawler import crawl_web from search import urank, lucky_search, lookup #test index, graph = crawl_web('www.dmoz.org') #seed page ranks = urank(graph) keyword = raw_input('Enter search keyword: ') print (lucky_search(index, ranks, keyword)) print (lookup(index, keyword)) #Stuff to add: # 1. Support for multiple keywords # 2. Integration with web page # 3. Removal of useless keywords and punctuation # 4. Add meaning to punctuation # 5. Listing of all results according to their rank #****6. A better page rank algorithm**** # #-End-
#!/usr/bin/python -tt from crawler import crawl_web, compute_ranks from search import lucky_search, lookup_best wc = crawl_web('localhost', 200) ranks = compute_ranks(wc, 50) print 'Enter 0 to exit.' while True: user_input = raw_input("Enter: keyword any_number_for_lucky_search [e.g. php]: ") if user_input: input = user_input.split() word = input[0] option = 1 if len(input) == 1 else 2 if word == '0': break if option == 1: results = lookup_best(word, wc, ranks) if results: for result in results[:25]: print result, '\t'*4, ranks[result] else: print 'No results!' else: print lucky_search(word, wc, ranks) or 'Not lucky! -_-' print print '\nThanks for using me!'
def GET(self, query): return lucky_search(corpus, query)
def GET(self, query): res = lucky_search(corpus, query) if res == None: return 'Try searchwithpeter.info' else: return res