def webAndFile(): data_load.get_traversal_data() file_data = indexer.read_data() web_data = WebParser.webData() print("File data search:") print("====================================================") FileSearcher.fileSearch(file_data) print("Web data search:") print("====================================================") WebSearcher.webSearcher(web_data)
except URLError as e: print("error") return returnList from data_load import get_traversal_data import indexer import searcher import pickle crawler_backlog = {} seed = "http://www.newhaven.edu/" crawler_backlog[seed]=0 returnList = [] file_data = get_traversal_data() web_data = visit_url(seed, "www.newhaven.edu", returnList) webPickle = "raw_web.pickle" dataPickle = "raw_data.pickle" # catch exceptions dealing with pickling the objects, as well as catching # any exceptions dealing with opening the file to begin with try: with open(webPickle, "bw") as out: try: pickle.dump(web_data, out) except pickle.PicklingError: print("Unpicklable object passed into dump().") except IOError as ioe: print("Unable to write to file: " + ioe.filename)
import searcher import data_load import indexer data_load.get_traversal_data() indexer.process_data("raw_data.pickle","fortunes_shelve") searcher.search("fortunes_shelve")
except URLError as e: print("error") return returnList from data_load import get_traversal_data import indexer import searcher import pickle crawler_backlog = {} seed = "http://www.newhaven.edu/" crawler_backlog[seed]=0 returnList = [] file_data = get_traversal_data() web_data = visit_url(seed, "www.newhaven.edu", returnList) webPickle = "raw_web.pickle" dataPickle = "raw_data.pickle" out = open(webPickle, "bw") pickle.dump(web_data, out) out.close() out = open(dataPickle, "bw") pickle.dump(file_data, out) out.close() indexer.process_data("unh_shelve", "indexed_files", dataPickle, webPickle) searcher.search("unh_shelve", "indexed_files")
def searchForContent(): data_load.get_traversal_data() search_data = indexer.read_data() searcher.search(search_data)
#JAY GAUVIN #HOMEWORK 4 # this is the main program import searcher import data_load import indexer traverse = input("Refresh/Traverse Data Files? (y/n): ") if(traverse == "y"): data_load.get_traversal_data() #creates raw_data.pickle only need to run once. dic = indexer.process_datafile("raw_data.pickle", "fortunes_shelve") #dic = indexer.process_data(data_load.data_list) #the original process method searcher.search("fortunes_shelve")
#Brandon Marshall #Python Scripting #October 1, 2015 #Homework 4 - File Traverser import data_load import indexer import searcher data_load.get_traversal_data() indexer.process_data("raw_data.pickle", "fortunes_shelve", "indexed_files") searcher.search("fortunes_shelve", "indexed_files")