Ejemplo n.º 1
0
def test_engine():
    index, graph = crawl_web('http://www.udacity.com/cs101x/index.html')
    ranks = compute_ranks(graph)
    #print index
    print "_+_+_+_++_+_++_+_+_+_+_++_+_+_++"
    print lucky_search(index, ranks, 'walking')
    #>>> https://www.udacity.com/cs101x/index.html

    print lucky_search(index, ranks, 'kicking')
    #>>> https://www.udacity.com/cs101x/crawling.html

    print lucky_search(index, ranks, 'Ossifrage')
    #>>> https://www.udacity.com/cs101x/flying.html

    print lucky_search(index, ranks, 'ossifrage')
    #>>> None

    print "_+_+_+_++_+_++_+_+_+_+_++_+_+_++"
    print ordered_search(index, ranks, 'to')
    #>>> https://www.udacity.com/cs101x/index.html

    print ordered_search(index, ranks, 'Ossifrage')
    #>>> https://www.udacity.com/cs101x/flying.html

    print ordered_search(index, ranks, 'crawl')
    #>>> index crawling

    print ordered_search(index, ranks, 'ossifrage')
Ejemplo n.º 2
0
    def post(self):

        corpus, graph = crawl_web('http://udacity.com/cs101x/urank/index.html')
        ranks = compute_ranks(graph)
        query = self.request.get('text')
        result = lucky_search(corpus, ranks, query)
        if not result:
            self.render(text="", links="try www.google.com")
        else:
            self.render(text=query, links=result)
Ejemplo n.º 3
0
def results():
    user_id = request.cookies.get('user_id')
    crawler = memcache.get(user_id)
    if crawler:
        search_term = str(request.args.get('q'))
        crawler.ranks = compute_ranks(crawler.graph)
        crawler.results = ordered_search(crawler.index_dict, crawler.ranks, search_term)   
        return render_template("results.html",results=crawler.results)
    else:
        return render_template("results.html")
Ejemplo n.º 4
0
    def post(self):

        corpus, graph = crawl_web('http://udacity.com/cs101x/urank/index.html')
        ranks = compute_ranks(graph)
        query = self.request.get('text')
        result = lucky_search(corpus, ranks, query)
        if not result:
            self.render(text = "", links = "try www.google.com")
        else:
            self.render(text = query, links = result)
Ejemplo n.º 5
0
def test():
    print "Testing..."
    from studentMain import LuckySearch
    index, graph = crawl_web('http://udacity.com/cs101x/urank/index.html')
    ranks = compute_ranks(graph)
    kathleen = 'http://udacity.com/cs101x/urank/kathleen.html'
    nickel = 'http://udacity.com/cs101x/urank/nickel.html'
    arsenic = 'http://udacity.com/cs101x/urank/arsenic.html'
    hummus = 'http://udacity.com/cs101x/urank/hummus.html'
    indexurl = 'http://udacity.com/cs101x/urank/index.html'
    lucky = LuckySearch()
    print lucky.GET('Hummus') == kathleen
    print lucky.GET('the') == nickel
    print lucky.GET('babaganoush') == "Try searchwithpeter.info."
    print "Finished tests."
Ejemplo n.º 6
0
def test_suite():
    print "Testing...\n"
    from studentMain import LuckySearch
    index, graph = crawl_web('http://udacity.com/cs101x/urank/index.html')
    ranks = compute_ranks(graph)
    kathleen = 'http://udacity.com/cs101x/urank/kathleen.html'
    nickel = 'http://udacity.com/cs101x/urank/nickel.html'
    arsenic = 'http://udacity.com/cs101x/urank/arsenic.html'
    hummus = 'http://udacity.com/cs101x/urank/hummus.html'
    indexurl = 'http://udacity.com/cs101x/urank/index.html'
    lucky = LuckySearch()
    print lucky.GET('Hummus') == kathleen
    print lucky.GET('the') == nickel
    print lucky.GET('babaganoush') == "Try searchwithpeter.info."
    print "\nFinished tests."
Ejemplo n.º 7
0
def test_engine():
    print "Testing..."
    kathleen = 'http://udacity.com/cs101x/urank/kathleen.html'
    nickel = 'http://udacity.com/cs101x/urank/nickel.html'
    arsenic = 'http://udacity.com/cs101x/urank/arsenic.html'
    hummus = 'http://udacity.com/cs101x/urank/hummus.html'
    indexurl = 'http://udacity.com/cs101x/urank/index.html'

    wcorpus = crawl_web('http://udacity.com/cs101x/urank/index.html')
    assert isinstance(wcorpus, WebCorpus)
    ranks = compute_ranks(wcorpus.graph)
    assert lucky_search(wcorpus.index, ranks, 'Hummus') == kathleen
    assert ordered_search(wcorpus.index, ranks, 'Hummus') == [kathleen, nickel, arsenic, hummus, indexurl] 
    assert lucky_search(wcorpus.index, ranks, 'the') == nickel
    assert ordered_search(wcorpus.index, ranks, 'the') == [nickel, arsenic, hummus, indexurl]
    assert lucky_search(wcorpus.index, ranks, 'babaganoush') == None
    assert ordered_search(wcorpus.index, ranks, 'babaganoush') == None
    print "Finished tests."
Ejemplo n.º 8
0
def test_engine():
    print "Testing..."
    index, graph = crawl_web('http://udacity.com/cs101x/urank/index.html')
    ranks = compute_ranks(graph)
    kathleen = 'http://udacity.com/cs101x/urank/kathleen.html'
    nickel = 'http://udacity.com/cs101x/urank/nickel.html'
    arsenic = 'http://udacity.com/cs101x/urank/arsenic.html'
    hummus = 'http://udacity.com/cs101x/urank/hummus.html'
    indexurl = 'http://udacity.com/cs101x/urank/index.html'
    # print lucky_search(index, ranks, 'Hummus')
    assert lucky_search(index, ranks, 'Hummus') == kathleen
    #print ordered_search(index, ranks, 'Hummus')
    assert ordered_search(index, ranks, 'Hummus') == [kathleen, nickel, arsenic, hummus, indexurl] 
    #print lucky_search(index, ranks, 'the')
    assert lucky_search(index, ranks, 'the') == nickel
    #print ordered_search(index, ranks, 'the')
    assert ordered_search(index, ranks, 'the') == [nickel, arsenic, hummus, indexurl]
    #print lucky_search(index, ranks, 'babaganoush')
    assert lucky_search(index, ranks, 'babaganoush') == None
    assert ordered_search(index, ranks, 'babaganoush') == None
    print "Finished tests."
Ejemplo n.º 9
0
def test_engine():
    print "Testing..."
    index, graph = crawl_web('http://udacity.com/cs101x/urank/index.html')
    ranks = compute_ranks(graph)
    kathleen = 'http://udacity.com/cs101x/urank/kathleen.html'
    nickel = 'http://udacity.com/cs101x/urank/nickel.html'
    arsenic = 'http://udacity.com/cs101x/urank/arsenic.html'
    hummus = 'http://udacity.com/cs101x/urank/hummus.html'
    indexurl = 'http://udacity.com/cs101x/urank/index.html'
    # print lucky_search(index, ranks, 'Hummus')
    assert lucky_search(index, ranks, 'Hummus') == kathleen
    #print ordered_search(index, ranks, 'Hummus')
    assert ordered_search(index, ranks, 'Hummus') == [
        kathleen, nickel, arsenic, hummus, indexurl
    ]
    #print lucky_search(index, ranks, 'the')
    assert lucky_search(index, ranks, 'the') == nickel
    #print ordered_search(index, ranks, 'the')
    assert ordered_search(index, ranks,
                          'the') == [nickel, arsenic, hummus, indexurl]
    #print lucky_search(index, ranks, 'babaganoush')
    assert lucky_search(index, ranks, 'babaganoush') == None
    assert ordered_search(index, ranks, 'babaganoush') == None
    print "Finished tests."
Ejemplo n.º 10
0
# This is the main file you should make changes
#
# To run this code locally and figure out how it works
# please download the code from our GitHub page
# http://udacity.github.io/cs101
# and run the server locally - python studentMain.py
#
from search import lucky_search
from crawler import crawl_web, compute_ranks

corpus, graph = crawl_web('http://udacity.com/cs101x/urank/index.html')
ranks = compute_ranks(graph)

class LuckySearch(object):        
    def GET(self, query):
        result = lucky_search(corpus, ranks, query)
        return result

# running some tests
from test import test_suite
test_suite()
    
# This will be executed only if you run this code locally
# using a command: python studentMain.py

if __name__ == "__main__":
    import web
    app = web.application(('/(.*)', 'LuckySearch'), globals())
    corpus, graph = crawl_web('http://udacity.com/cs101x/urank/index.html')
    app.run()        
Ejemplo n.º 11
0
import crawler

c = crawler.Crawler('https://www.crawler-test.com/')
index, graph = c.crawl_web()
print("initializing crawler")
ranks = crawler.compute_ranks(graph)
keyword = input("Input the keyword: ")

print(crawler.lucky_search(index, ranks, keyword))
print(crawler.all_search(index, ranks, keyword))
Ejemplo n.º 12
0
#     if the request is '/about', the server should respond with:
#          'This is my udacious project!'
#     for all other requests, the server should respond with a normal
#     search response.
#
# Hint: you will need to add two strings to the tuple of inputs for
#    web.application(...) as well as define a new class.
#
# To test your code locally you have to install web.py and all of
# these files locally as well.

from search import lucky_search
from crawler import crawl_web, compute_ranks

corpus, graph = crawl_web('http://udacity.com/cs101x/urank/index.html')
ranks = compute_ranks(graph)


class LuckySearch(object):
    def GET(self, query):
        result = lucky_search(corpus, ranks, query)
        return result


class About(object):
    def GET(self, query):
        return 'This is my udacious project!'


# This will be executed only if you run this code locally
# using a command: python studentMain.py
Ejemplo n.º 13
0
#!/usr/bin/python -tt

from crawler import crawl_web, compute_ranks
from search import lucky_search, lookup_best

wc = crawl_web('localhost', 200)
ranks = compute_ranks(wc, 50)

print 'Enter 0 to exit.'
while True:
    user_input = raw_input("Enter: keyword any_number_for_lucky_search [e.g. php]: ")
    if user_input:
        input = user_input.split()
        word = input[0]
        option = 1 if len(input) == 1 else 2
        if word == '0':
            break
        if option == 1:
            results = lookup_best(word, wc, ranks)
            if results:
                for result in results[:25]:
                    print result, '\t'*4, ranks[result]
            else:
                print 'No results!'
        else:
            print lucky_search(word, wc, ranks) or 'Not lucky! -_-'
    print

print '\nThanks for using me!'
Ejemplo n.º 14
0
import crawler

# Initializing seed and maximum depth to crawl
seed = "http://www.udacity.com/cs101x/index.html"
default_depth = 5
default_pages = 50
# Pages linking to themselves (k = 0)
default_collusion_level = 0

index, graph, pages = crawler.crawl(seed, default_depth, default_pages)
ranks = crawler.compute_ranks(graph, default_collusion_level)

print "Pages (URLs visited)"
for p in pages:
    print p

print "\nType"
print "'index' to view entire index"
print "'words' to view all words in index"
print "'ranks' to view page ranks"
print "'query' to search for keyword"
print "'exit' to quit program\n"

while True:
    print ' '
    op = raw_input('-->')
    if op == 'index':
        print index
    elif op == 'words':
        for word in index:
            print word