def get_all_degree_links(degree, start_degree_pages, end_link, path, reverse_path_last_degree): current_degree_pages = set() #if no more degrees to go return all links of current degree if degree == 0: return start_degree_pages #for each page in current degree get all URLS for page in start_degree_pages: all_pages = web_scraper.getAllUrl(page) #remove all duplicate pages that we already checked for degrees in path: all_pages.difference_update(degrees) current_degree_pages = current_degree_pages.union(all_pages) #if end link found return if end_link in all_pages: print("Found the Page in get_all_degree_links") return current_degree_pages.union(all_pages) #check for overlap overlap = all_pages & reverse_path_last_degree if len(overlap) > 0: print("Found a Overlap") print(overlap) return current_degree_pages.union(all_pages) return get_all_degree_links(degree - 1, current_degree_pages, end_link, path, reverse_path_last_degree)
def get_all_degree_links(degree, start_degree_pages): current_degree_pages = [] if degree == 0: return start_degree_pages for page in start_degree_pages: current_degree_pages += web_scraper.getAllUrl(page) return get_all_degree_links(degree - 1, current_degree_pages)
def get_all_degree_links(degree, start_degree_pages, end_link): current_degree_pages = [] #if no more degrees to go return all links of current degree if degree == 0: return start_degree_pages #for each page in current degree get all URLS for page in start_degree_pages: all_pages = web_scraper.getAllUrl(page) #if end link found return if end_link in all_pages: return list(current_degree_pages) + list(all_pages) current_degree_pages += all_pages #do some for next degree return get_all_degree_links(degree - 1, current_degree_pages, end_link)
def is_link_in_page(page_to_check, end_link): links_on_page = web_scraper.getAllUrl(page_to_check) if end_link in links_on_page: return True else: return False
#Goes through the whole graph and finds all the paths and the filters to the shortest path import web_scraper start_link = "/wiki/Germany" end_link = "/wiki/Shanghai" graph = {'A': ['B', 'C'], 'B': ['C', 'D'], 'C': ['D'], 'D': ['C'], 'E': ['F'], 'F': ['C']} web_scraper.getAllUrl(start_link) def find_shortest_path(graph, start, end, path=[]): path = path + [start] if start == end: return path # if start not in graph: # return None shortest = None for node in graph[start]: if node not in path: newpath = find_shortest_path(graph, node, end, path) if newpath: if not shortest or len(newpath) < len(shortest): shortest = newpath return shortest
import sys def is_link_in_page(page_to_check, end_link): links_on_page = web_scraper.getAllUrl(page_to_check) if end_link in links_on_page: return True else: return False start_link = "/wiki/Flour" link = start_link end_link = "/wiki/Flour" path = [[start_link]] links_on_start_page = web_scraper.getAllUrl(start_link) def get_all_degree_links(degree, start_degree_pages): current_degree_pages = [] if degree == 0: return start_degree_pages for page in start_degree_pages: current_degree_pages += web_scraper.getAllUrl(page) return get_all_degree_links(degree - 1, current_degree_pages) degree_count = 0 while True: path.append(get_all_degree_links(degree_count, start_link[degree_count])) if end_link in path[degree_count]):