Ejemplo n.º 1
0
def test(seed):
	print 'Crawling web . . .'
	index, graph = crawl_web(seed)
	print 'Calculating Page Rank . . .'
	ranks = page_rank(graph)
	print ranks
	print ordered_search(index, ranks, 'dogs')
	print lucky_search(index, ranks, 'cats')
Ejemplo n.º 2
0
def test_engine():
    index, graph = crawl_web('http://www.udacity.com/cs101x/index.html')
    ranks = compute_ranks(graph)
    #print index
    print "_+_+_+_++_+_++_+_+_+_+_++_+_+_++"
    print lucky_search(index, ranks, 'walking')
    #>>> https://www.udacity.com/cs101x/index.html

    print lucky_search(index, ranks, 'kicking')
    #>>> https://www.udacity.com/cs101x/crawling.html

    print lucky_search(index, ranks, 'Ossifrage')
    #>>> https://www.udacity.com/cs101x/flying.html

    print lucky_search(index, ranks, 'ossifrage')
    #>>> None

    print "_+_+_+_++_+_++_+_+_+_+_++_+_+_++"
    print ordered_search(index, ranks, 'to')
    #>>> https://www.udacity.com/cs101x/index.html

    print ordered_search(index, ranks, 'Ossifrage')
    #>>> https://www.udacity.com/cs101x/flying.html

    print ordered_search(index, ranks, 'crawl')
    #>>> index crawling

    print ordered_search(index, ranks, 'ossifrage')
Ejemplo n.º 3
0
def test_engine():
    print "Testing..."
    kathleen = 'http://udacity.com/cs101x/urank/kathleen.html'
    nickel = 'http://udacity.com/cs101x/urank/nickel.html'
    arsenic = 'http://udacity.com/cs101x/urank/arsenic.html'
    hummus = 'http://udacity.com/cs101x/urank/hummus.html'
    indexurl = 'http://udacity.com/cs101x/urank/index.html'

    corpus = crawl_web('http://udacity.com/cs101x/urank/index.html')

    assert lucky_search(corpus, 'Hummus') == kathleen
    assert ordered_search(corpus, 'Hummus') == [kathleen, nickel, arsenic, hummus, indexurl] 
    assert lucky_search(corpus, 'the') == nickel
    assert ordered_search(corpus, 'the') == [nickel, arsenic, hummus, indexurl]
    assert lucky_search(corpus, 'babaganoush') == None
    assert ordered_search(corpus, 'babaganoush') == None
    print "Finished tests."
Ejemplo n.º 4
0
def test_engine():
    print "Testing..."
    kathleen = 'http://udacity.com/cs101x/urank/kathleen.html'
    nickel = 'http://udacity.com/cs101x/urank/nickel.html'
    arsenic = 'http://udacity.com/cs101x/urank/arsenic.html'
    hummus = 'http://udacity.com/cs101x/urank/hummus.html'
    indexurl = 'http://udacity.com/cs101x/urank/index.html'

    corpus = crawl_web('http://udacity.com/cs101x/urank/index.html')

    assert lucky_search(corpus, 'Hummus') == kathleen
    assert ordered_search(corpus, 'Hummus') == [kathleen, nickel, arsenic, hummus, indexurl] 
    assert lucky_search(corpus, 'the') == nickel
    assert ordered_search(corpus, 'the') == [nickel, arsenic, hummus, indexurl]
    assert lucky_search(corpus, 'babaganoush') == None
    assert ordered_search(corpus, 'babaganoush') == None
    print "Finished tests."
Ejemplo n.º 5
0
    def post(self):

        corpus, graph = crawl_web('http://udacity.com/cs101x/urank/index.html')
        ranks = compute_ranks(graph)
        query = self.request.get('text')
        result = lucky_search(corpus, ranks, query)
        if not result:
            self.render(text="", links="try www.google.com")
        else:
            self.render(text=query, links=result)
Ejemplo n.º 6
0
    def post(self):

        corpus, graph = crawl_web('http://udacity.com/cs101x/urank/index.html')
        ranks = compute_ranks(graph)
        query = self.request.get('text')
        result = lucky_search(corpus, ranks, query)
        if not result:
            self.render(text = "", links = "try www.google.com")
        else:
            self.render(text = query, links = result)
Ejemplo n.º 7
0
def test_engine():
    print "Testing..."
    kathleen = "http://udacity.com/cs101x/urank/kathleen.html"
    nickel = "http://udacity.com/cs101x/urank/nickel.html"
    arsenic = "http://udacity.com/cs101x/urank/arsenic.html"
    hummus = "http://udacity.com/cs101x/urank/hummus.html"
    indexurl = "http://udacity.com/cs101x/urank/index.html"

    corpus = crawl_web("http://udacity.com/cs101x/urank/index.html")
    fname = "corpus.pkl"

    # YOUR CODE HERE

    assert lucky_search(corpus, "Hummus") == kathleen
    assert ordered_search(corpus, "Hummus") == [kathleen, nickel, arsenic, hummus, indexurl]
    assert lucky_search(corpus, "the") == nickel
    assert ordered_search(corpus, "the") == [nickel, arsenic, hummus, indexurl]
    assert lucky_search(corpus, "babaganoush") == None
    assert ordered_search(corpus, "babaganoush") == None
    print "Finished tests."
Ejemplo n.º 8
0
def test_engine():
    print "Testing..."
    index, graph = crawl_web('http://udacity.com/cs101x/urank/index.html')
    ranks = compute_ranks(graph)
    kathleen = 'http://udacity.com/cs101x/urank/kathleen.html'
    nickel = 'http://udacity.com/cs101x/urank/nickel.html'
    arsenic = 'http://udacity.com/cs101x/urank/arsenic.html'
    hummus = 'http://udacity.com/cs101x/urank/hummus.html'
    indexurl = 'http://udacity.com/cs101x/urank/index.html'
    # print lucky_search(index, ranks, 'Hummus')
    assert lucky_search(index, ranks, 'Hummus') == kathleen
    #print ordered_search(index, ranks, 'Hummus')
    assert ordered_search(index, ranks, 'Hummus') == [kathleen, nickel, arsenic, hummus, indexurl] 
    #print lucky_search(index, ranks, 'the')
    assert lucky_search(index, ranks, 'the') == nickel
    #print ordered_search(index, ranks, 'the')
    assert ordered_search(index, ranks, 'the') == [nickel, arsenic, hummus, indexurl]
    #print lucky_search(index, ranks, 'babaganoush')
    assert lucky_search(index, ranks, 'babaganoush') == None
    assert ordered_search(index, ranks, 'babaganoush') == None
    print "Finished tests."
Ejemplo n.º 9
0
def test_engine():
    print "Testing..."
    index, graph = crawl_web('http://udacity.com/cs101x/urank/index.html')
    ranks = compute_ranks(graph)
    kathleen = 'http://udacity.com/cs101x/urank/kathleen.html'
    nickel = 'http://udacity.com/cs101x/urank/nickel.html'
    arsenic = 'http://udacity.com/cs101x/urank/arsenic.html'
    hummus = 'http://udacity.com/cs101x/urank/hummus.html'
    indexurl = 'http://udacity.com/cs101x/urank/index.html'
    # print lucky_search(index, ranks, 'Hummus')
    assert lucky_search(index, ranks, 'Hummus') == kathleen
    #print ordered_search(index, ranks, 'Hummus')
    assert ordered_search(index, ranks, 'Hummus') == [
        kathleen, nickel, arsenic, hummus, indexurl
    ]
    #print lucky_search(index, ranks, 'the')
    assert lucky_search(index, ranks, 'the') == nickel
    #print ordered_search(index, ranks, 'the')
    assert ordered_search(index, ranks,
                          'the') == [nickel, arsenic, hummus, indexurl]
    #print lucky_search(index, ranks, 'babaganoush')
    assert lucky_search(index, ranks, 'babaganoush') == None
    assert ordered_search(index, ranks, 'babaganoush') == None
    print "Finished tests."
Ejemplo n.º 10
0
 def GET(self, query):
     result = lucky_search(corpus, ranks, query)
     return result
Ejemplo n.º 11
0
def test_engine():
    print "Testing..."
    kathleen = 'http://udacity.com/cs101x/urank/kathleen.html'
    nickel = 'http://udacity.com/cs101x/urank/nickel.html'
    arsenic = 'http://udacity.com/cs101x/urank/arsenic.html'
    hummus = 'http://udacity.com/cs101x/urank/hummus.html'
    indexurl = 'http://udacity.com/cs101x/urank/index.html'

    corpus = crawl_web('http://udacity.com/cs101x/urank/index.html')
    fname = 'corpus.pkl'

    # YOUR CODE HERE
    try:
        with open(fname, 'w') as fout:
            pickle.dump(corpus, fout)
            print "Successfully wrote to " + fname
    except IOError, e:
        print "Most odacious! Cannot write to corpus " + str(e)

    assert lucky_search(corpus, 'Hummus') == kathleen
    assert ordered_search(
        corpus, 'Hummus') == [kathleen, nickel, arsenic, hummus, indexurl]
    assert lucky_search(corpus, 'the') == nickel
    assert ordered_search(corpus, 'the') == [nickel, arsenic, hummus, indexurl]
    assert lucky_search(corpus, 'babaganoush') == None
    assert ordered_search(corpus, 'babaganoush') == None
    print "Finished tests."


test_engine()
Ejemplo n.º 12
0
    def GET(self, query):
	result = lucky_search(corpus, ranks, query)
	if result == None:
		return "Try searchwithpeter.info"
	return result
Ejemplo n.º 13
0
 def GET(self, query):
     result = lucky_search(corpus, ranks, query)
     if result:
         return result
     else:
         return "Try searchwithpeter.info."
Ejemplo n.º 14
0
Archivo: Main.py Proyecto: tar07/Python
from search import lucky_search, ordered_search
import pickle

def test_engine():
    print "Testing..."
    kathleen = 'http://udacity.com/cs101x/urank/kathleen.html'
    nickel = 'http://udacity.com/cs101x/urank/nickel.html'
    arsenic = 'http://udacity.com/cs101x/urank/arsenic.html'
    hummus = 'http://udacity.com/cs101x/urank/hummus.html'
    indexurl = 'http://udacity.com/cs101x/urank/index.html'

    corpus = crawl_web('http://udacity.com/cs101x/urank/index.html')
    fname = 'corpus.pkl'
    
    try:
        with open(fname, 'w') as fout:
            pickle.dump(corpus, fout)
            print "Pickled file to " + fname
    except IOError, e:
        print "Failed to write to file: " + str(e)

    assert lucky_search(corpus, 'Hummus') == kathleen
    assert ordered_search(corpus, 'Hummus') == [kathleen, nickel, arsenic, hummus, indexurl] 
    assert lucky_search(corpus, 'the') == nickel
    assert ordered_search(corpus, 'the') == [nickel, arsenic, hummus, indexurl]
    assert lucky_search(corpus, 'babaganoush') == None
    assert ordered_search(corpus, 'babaganoush') == None
    print "Finished tests."

test_engine()
Ejemplo n.º 15
0
from crawler import crawl_web
from search import urank, lucky_search, lookup
#test
index, graph = crawl_web('www.dmoz.org') #seed page
ranks = urank(graph)
keyword = raw_input('Enter search keyword: ')
print (lucky_search(index, ranks, keyword))
print (lookup(index, keyword))

#Stuff to add:
#    1. Support for multiple keywords
#    2. Integration with web page
#    3. Removal of useless keywords and punctuation
#    4. Add meaning to punctuation
#    5. Listing of all results according to their rank
#****6. A better page rank algorithm****
#
#-End-
Ejemplo n.º 16
0
#!/usr/bin/python -tt

from crawler import crawl_web, compute_ranks
from search import lucky_search, lookup_best

wc = crawl_web('localhost', 200)
ranks = compute_ranks(wc, 50)

print 'Enter 0 to exit.'
while True:
    user_input = raw_input("Enter: keyword any_number_for_lucky_search [e.g. php]: ")
    if user_input:
        input = user_input.split()
        word = input[0]
        option = 1 if len(input) == 1 else 2
        if word == '0':
            break
        if option == 1:
            results = lookup_best(word, wc, ranks)
            if results:
                for result in results[:25]:
                    print result, '\t'*4, ranks[result]
            else:
                print 'No results!'
        else:
            print lucky_search(word, wc, ranks) or 'Not lucky! -_-'
    print

print '\nThanks for using me!'
Ejemplo n.º 17
0
 def GET(self, query):
   return lucky_search(corpus, query)
Ejemplo n.º 18
0
 def GET(self, query):
     res = lucky_search(corpus, query)
     if res == None:
         return 'Try searchwithpeter.info'
     else:
         return res
Ejemplo n.º 19
0
 def GET(self, query):
     result = lucky_search(corpus, ranks, query)
     return result
Ejemplo n.º 20
0
 def GET(self, query):
     res = lucky_search(corpus, query)
     if res == None:
         return 'Try searchwithpeter.info'
     else:
         return res