Example #1
0
def webAndFile():
    data_load.get_traversal_data()
    file_data = indexer.read_data()
    web_data = WebParser.webData()
    print("File data search:")
    print("====================================================")
    FileSearcher.fileSearch(file_data)
    print("Web data search:")
    print("====================================================")
    WebSearcher.webSearcher(web_data)
	except URLError as e:
		print("error")

	return returnList

from data_load import get_traversal_data
import indexer
import searcher
import pickle

crawler_backlog = {}
seed = "http://www.newhaven.edu/"
crawler_backlog[seed]=0
returnList = []
file_data = get_traversal_data()
web_data = visit_url(seed, "www.newhaven.edu", returnList)

webPickle = "raw_web.pickle"
dataPickle = "raw_data.pickle"

# catch exceptions dealing with pickling the objects, as well as catching 
# any exceptions dealing with opening the file to begin with
try:
	with open(webPickle, "bw") as out:
		try:
			pickle.dump(web_data, out)
		except pickle.PicklingError:
			print("Unpicklable object passed into dump().")
except IOError as ioe:
	print("Unable to write to file: " + ioe.filename)
Example #3
0
import searcher
import data_load
import indexer

data_load.get_traversal_data()
indexer.process_data("raw_data.pickle","fortunes_shelve")
searcher.search("fortunes_shelve")
Example #4
0
	except URLError as e:
		print("error")

	return returnList

from data_load import get_traversal_data
import indexer
import searcher
import pickle

crawler_backlog = {}
seed = "http://www.newhaven.edu/"
crawler_backlog[seed]=0
returnList = []
file_data = get_traversal_data()
web_data = visit_url(seed, "www.newhaven.edu", returnList)

webPickle = "raw_web.pickle"
dataPickle = "raw_data.pickle"

out = open(webPickle, "bw")
pickle.dump(web_data, out)
out.close()

out = open(dataPickle, "bw")
pickle.dump(file_data, out)
out.close()

indexer.process_data("unh_shelve", "indexed_files", dataPickle, webPickle)
searcher.search("unh_shelve", "indexed_files")
Example #5
0
def searchForContent():
    data_load.get_traversal_data()
    search_data = indexer.read_data()
    searcher.search(search_data)
Example #6
0
#JAY GAUVIN
#HOMEWORK 4

# this is the main program

import searcher
import data_load
import indexer

traverse = input("Refresh/Traverse Data Files? (y/n): ")
if(traverse == "y"): 
	data_load.get_traversal_data() #creates raw_data.pickle only need to run once.
	dic = indexer.process_datafile("raw_data.pickle", "fortunes_shelve")
#dic = indexer.process_data(data_load.data_list) #the original process method
searcher.search("fortunes_shelve")
Example #7
0
#Brandon Marshall
#Python Scripting
#October 1, 2015
#Homework 4 - File Traverser

import data_load
import indexer
import searcher

data_load.get_traversal_data()
indexer.process_data("raw_data.pickle", "fortunes_shelve", "indexed_files")
searcher.search("fortunes_shelve", "indexed_files")