def bfsSearchMovie(url): href_queue = [] head = 0 movie_href = parseSearchPage(url) for movie_link in movie_href: movie_link = HOST_URL + movie_link href_queue.append({'href': movie_link, 'dist': 0}) while (head <= len(href_queue)): cur = href_queue[head] info = getMovieInfo(cur['href'], cur['dist']) head += 1 if info == None: continue for cast_href in info['cast_href']: movies = parseCastPage(cast_href) if movies == None: continue for movie in movies['movie_href']: href_queue.append({'href': movie, 'dist': cur['dist'] + 1}) print href_queue
def dfsSearchCast(info, dist): # Set max depth as 10 if dist >= 10: return # go through all the casts for cast_href in info['cast_href']: movies = parseCastPage(cast_href) if movies == None: continue # Search each movie of the cast for movie in movies['movie_href']: new_info = getMovieInfo(movie, dist + 1) # Not a movie page if new_info == None: continue # Continue searching dfsSearchCast(new_info, dist + 1)