def BFS(src, dest = None, wiki_dict = None): global visited global prev src = src.lower() #makes lowercase if dest: dest = dest.lower() #makes lowercase if not wiki_dict: #if just using bfs from command line wiki_dict = analytics.load_links() q = [src] # initialize our queue prev[src] = None # there is no previous page visited.add(src) while len(q) > 0: #pop an element off the queue and mark it as visited s = q.pop(0) #return if it's what we're looking for and print the path if s == dest: return the_path(s, prev) #extract all the pages it links to try: links = wiki_dict[s] except(Exception): return "Article not present in directory" #set the parent, add all the links to the queue, and mark them as visited for p in links: if not p in visited: prev[p] = s q.append(p) visited.add(p) r = prev visited = sets.Set() # reinitialize these globals prev = {} return r
#!/usr/bin/env python import sys import analytics import bfs #loads a compressed dictionary to be used in all functions wiki_dict = analytics.load_links() #runs BFS on the two articles shortest_path = bfs.BFS((sys.argv[1]).lower(), (sys.argv[2]).lower(), wiki_dict) #if the destination article is a few pages away, returns the path #else says article is on page if type(shortest_path) == dict: print "The two articles are in no way connected" elif len(shortest_path) > 2: print "The shortest path between {0} and {1} is following the following links: \n {2}".format((sys.argv[1]).lower(), (sys.argv[2]).lower(), shortest_path) else: print "{0} is a direct link from {1}".format(sys.argv[2], sys.argv[1]) #if user wants analytics and provides "-analytics" then runs if len(sys.argv) == 4 and sys.argv[3] == "-analytics": print "\n" #checks references for the starting article raw_input("List pages that reference {0}? ".format(sys.argv[1])) analytics.where_referenced((sys.argv[1]).lower(), wiki_dict) #checks references for the destination article raw_input("List pages that reference {0}? ".format(sys.argv[2])) analytics.where_referenced((sys.argv[2]).lower(), wiki_dict)