def test_engine(): index, graph = crawl_web('http://www.udacity.com/cs101x/index.html') ranks = compute_ranks(graph) #print index print "_+_+_+_++_+_++_+_+_+_+_++_+_+_++" print lucky_search(index, ranks, 'walking') #>>> https://www.udacity.com/cs101x/index.html print lucky_search(index, ranks, 'kicking') #>>> https://www.udacity.com/cs101x/crawling.html print lucky_search(index, ranks, 'Ossifrage') #>>> https://www.udacity.com/cs101x/flying.html print lucky_search(index, ranks, 'ossifrage') #>>> None print "_+_+_+_++_+_++_+_+_+_+_++_+_+_++" print ordered_search(index, ranks, 'to') #>>> https://www.udacity.com/cs101x/index.html print ordered_search(index, ranks, 'Ossifrage') #>>> https://www.udacity.com/cs101x/flying.html print ordered_search(index, ranks, 'crawl') #>>> index crawling print ordered_search(index, ranks, 'ossifrage')
def post(self): corpus, graph = crawl_web('http://udacity.com/cs101x/urank/index.html') ranks = compute_ranks(graph) query = self.request.get('text') result = lucky_search(corpus, ranks, query) if not result: self.render(text="", links="try www.google.com") else: self.render(text=query, links=result)
def results(): user_id = request.cookies.get('user_id') crawler = memcache.get(user_id) if crawler: search_term = str(request.args.get('q')) crawler.ranks = compute_ranks(crawler.graph) crawler.results = ordered_search(crawler.index_dict, crawler.ranks, search_term) return render_template("results.html",results=crawler.results) else: return render_template("results.html")
def post(self): corpus, graph = crawl_web('http://udacity.com/cs101x/urank/index.html') ranks = compute_ranks(graph) query = self.request.get('text') result = lucky_search(corpus, ranks, query) if not result: self.render(text = "", links = "try www.google.com") else: self.render(text = query, links = result)
def test(): print "Testing..." from studentMain import LuckySearch index, graph = crawl_web('http://udacity.com/cs101x/urank/index.html') ranks = compute_ranks(graph) kathleen = 'http://udacity.com/cs101x/urank/kathleen.html' nickel = 'http://udacity.com/cs101x/urank/nickel.html' arsenic = 'http://udacity.com/cs101x/urank/arsenic.html' hummus = 'http://udacity.com/cs101x/urank/hummus.html' indexurl = 'http://udacity.com/cs101x/urank/index.html' lucky = LuckySearch() print lucky.GET('Hummus') == kathleen print lucky.GET('the') == nickel print lucky.GET('babaganoush') == "Try searchwithpeter.info." print "Finished tests."
def test_suite(): print "Testing...\n" from studentMain import LuckySearch index, graph = crawl_web('http://udacity.com/cs101x/urank/index.html') ranks = compute_ranks(graph) kathleen = 'http://udacity.com/cs101x/urank/kathleen.html' nickel = 'http://udacity.com/cs101x/urank/nickel.html' arsenic = 'http://udacity.com/cs101x/urank/arsenic.html' hummus = 'http://udacity.com/cs101x/urank/hummus.html' indexurl = 'http://udacity.com/cs101x/urank/index.html' lucky = LuckySearch() print lucky.GET('Hummus') == kathleen print lucky.GET('the') == nickel print lucky.GET('babaganoush') == "Try searchwithpeter.info." print "\nFinished tests."
def test_engine(): print "Testing..." kathleen = 'http://udacity.com/cs101x/urank/kathleen.html' nickel = 'http://udacity.com/cs101x/urank/nickel.html' arsenic = 'http://udacity.com/cs101x/urank/arsenic.html' hummus = 'http://udacity.com/cs101x/urank/hummus.html' indexurl = 'http://udacity.com/cs101x/urank/index.html' wcorpus = crawl_web('http://udacity.com/cs101x/urank/index.html') assert isinstance(wcorpus, WebCorpus) ranks = compute_ranks(wcorpus.graph) assert lucky_search(wcorpus.index, ranks, 'Hummus') == kathleen assert ordered_search(wcorpus.index, ranks, 'Hummus') == [kathleen, nickel, arsenic, hummus, indexurl] assert lucky_search(wcorpus.index, ranks, 'the') == nickel assert ordered_search(wcorpus.index, ranks, 'the') == [nickel, arsenic, hummus, indexurl] assert lucky_search(wcorpus.index, ranks, 'babaganoush') == None assert ordered_search(wcorpus.index, ranks, 'babaganoush') == None print "Finished tests."
def test_engine(): print "Testing..." index, graph = crawl_web('http://udacity.com/cs101x/urank/index.html') ranks = compute_ranks(graph) kathleen = 'http://udacity.com/cs101x/urank/kathleen.html' nickel = 'http://udacity.com/cs101x/urank/nickel.html' arsenic = 'http://udacity.com/cs101x/urank/arsenic.html' hummus = 'http://udacity.com/cs101x/urank/hummus.html' indexurl = 'http://udacity.com/cs101x/urank/index.html' # print lucky_search(index, ranks, 'Hummus') assert lucky_search(index, ranks, 'Hummus') == kathleen #print ordered_search(index, ranks, 'Hummus') assert ordered_search(index, ranks, 'Hummus') == [kathleen, nickel, arsenic, hummus, indexurl] #print lucky_search(index, ranks, 'the') assert lucky_search(index, ranks, 'the') == nickel #print ordered_search(index, ranks, 'the') assert ordered_search(index, ranks, 'the') == [nickel, arsenic, hummus, indexurl] #print lucky_search(index, ranks, 'babaganoush') assert lucky_search(index, ranks, 'babaganoush') == None assert ordered_search(index, ranks, 'babaganoush') == None print "Finished tests."
def test_engine(): print "Testing..." index, graph = crawl_web('http://udacity.com/cs101x/urank/index.html') ranks = compute_ranks(graph) kathleen = 'http://udacity.com/cs101x/urank/kathleen.html' nickel = 'http://udacity.com/cs101x/urank/nickel.html' arsenic = 'http://udacity.com/cs101x/urank/arsenic.html' hummus = 'http://udacity.com/cs101x/urank/hummus.html' indexurl = 'http://udacity.com/cs101x/urank/index.html' # print lucky_search(index, ranks, 'Hummus') assert lucky_search(index, ranks, 'Hummus') == kathleen #print ordered_search(index, ranks, 'Hummus') assert ordered_search(index, ranks, 'Hummus') == [ kathleen, nickel, arsenic, hummus, indexurl ] #print lucky_search(index, ranks, 'the') assert lucky_search(index, ranks, 'the') == nickel #print ordered_search(index, ranks, 'the') assert ordered_search(index, ranks, 'the') == [nickel, arsenic, hummus, indexurl] #print lucky_search(index, ranks, 'babaganoush') assert lucky_search(index, ranks, 'babaganoush') == None assert ordered_search(index, ranks, 'babaganoush') == None print "Finished tests."
# This is the main file you should make changes # # To run this code locally and figure out how it works # please download the code from our GitHub page # http://udacity.github.io/cs101 # and run the server locally - python studentMain.py # from search import lucky_search from crawler import crawl_web, compute_ranks corpus, graph = crawl_web('http://udacity.com/cs101x/urank/index.html') ranks = compute_ranks(graph) class LuckySearch(object): def GET(self, query): result = lucky_search(corpus, ranks, query) return result # running some tests from test import test_suite test_suite() # This will be executed only if you run this code locally # using a command: python studentMain.py if __name__ == "__main__": import web app = web.application(('/(.*)', 'LuckySearch'), globals()) corpus, graph = crawl_web('http://udacity.com/cs101x/urank/index.html') app.run()
import crawler c = crawler.Crawler('https://www.crawler-test.com/') index, graph = c.crawl_web() print("initializing crawler") ranks = crawler.compute_ranks(graph) keyword = input("Input the keyword: ") print(crawler.lucky_search(index, ranks, keyword)) print(crawler.all_search(index, ranks, keyword))
# if the request is '/about', the server should respond with: # 'This is my udacious project!' # for all other requests, the server should respond with a normal # search response. # # Hint: you will need to add two strings to the tuple of inputs for # web.application(...) as well as define a new class. # # To test your code locally you have to install web.py and all of # these files locally as well. from search import lucky_search from crawler import crawl_web, compute_ranks corpus, graph = crawl_web('http://udacity.com/cs101x/urank/index.html') ranks = compute_ranks(graph) class LuckySearch(object): def GET(self, query): result = lucky_search(corpus, ranks, query) return result class About(object): def GET(self, query): return 'This is my udacious project!' # This will be executed only if you run this code locally # using a command: python studentMain.py
#!/usr/bin/python -tt from crawler import crawl_web, compute_ranks from search import lucky_search, lookup_best wc = crawl_web('localhost', 200) ranks = compute_ranks(wc, 50) print 'Enter 0 to exit.' while True: user_input = raw_input("Enter: keyword any_number_for_lucky_search [e.g. php]: ") if user_input: input = user_input.split() word = input[0] option = 1 if len(input) == 1 else 2 if word == '0': break if option == 1: results = lookup_best(word, wc, ranks) if results: for result in results[:25]: print result, '\t'*4, ranks[result] else: print 'No results!' else: print lucky_search(word, wc, ranks) or 'Not lucky! -_-' print print '\nThanks for using me!'
import crawler # Initializing seed and maximum depth to crawl seed = "http://www.udacity.com/cs101x/index.html" default_depth = 5 default_pages = 50 # Pages linking to themselves (k = 0) default_collusion_level = 0 index, graph, pages = crawler.crawl(seed, default_depth, default_pages) ranks = crawler.compute_ranks(graph, default_collusion_level) print "Pages (URLs visited)" for p in pages: print p print "\nType" print "'index' to view entire index" print "'words' to view all words in index" print "'ranks' to view page ranks" print "'query' to search for keyword" print "'exit' to quit program\n" while True: print ' ' op = raw_input('-->') if op == 'index': print index elif op == 'words': for word in index: print word