def __init__(self, siteName): self.siteName = siteName self.rdList = Set() self.recordIndicator = Set() self.dic = dict() self.dicCacheCount = dict() self.dicTotalCount = dict() self.localtime = time.localtime() self.httpsCount = 0 self.httpsCached = 0 self.cacheableObjectCount = 0 self.cacheableObjectsThatAreCached = 0 self.responseFileString = "" self.imageCount = 0 self.imageCached = 0 self.imageHttps = 0 self.imageHttpsCached = 0 self.textCount = 0 self.textCached = 0 self.textHttps = 0 self.textHttpsCached = 0 self.scriptCount = 0 self.scriptCached = 0 self.scriptHttps = 0 self.scriptHttpsCached = 0 self.videoCount = 0 self.videoCached = 0 self.videoHttps = 0 self.videoHttpsCached = 0 self.iconCount = 0 self.iconCached = 0 self.iconHttps = 0 self.iconHttpsCached = 0 self.appCount = 0 self.appCached = 0 self.appHttps = 0 self.appHttpsCached = 0 self.sizeDetailSet = Set() self.requestFileRaw = Utils.getConfig("requestFolder") + siteName.rstrip() + ".request_raw" self.requestFile = Utils.getConfig("requestFolder") + siteName.rstrip() + ".request" self.responseFile = Utils.getConfig("responseFolder") + siteName.rstrip() + ".response"
def parse_options(): """parse_options() -> opts, args Parse any command-line options given returning both the parsed options and arguments. """ parser = optparse.OptionParser(usage=USAGE, version=VERSION) parser.add_option("-f", "--file", action="store", default=Utils.getConfig("defaultFile"), dest="file", help="Read the site name from external file") parser.add_option("-s", "--site-name", action="store", default="", dest="sitename", help="Get links for specified url only") opts, args = parser.parse_args() return opts, args
from Utils import Utils import threading,Queue,time,sys,traceback from time import sleep from sets import Set import optparse from ResponseGenerator import * from RequestGenerator import * from ResourceDetails import * from LinkSetGenerator import Crawler maximumSites = int(Utils.getConfig("maximumSites")) threadLimiter = threading.BoundedSemaphore(maximumSites) #Globals (start with a captial letter) Qin = Queue.Queue() Qout = Queue.Queue() Qerr = Queue.Queue() Pool = [] def err_msg(): trace= sys.exc_info()[2] try: exc_value=str(sys.exc_value) except: exc_value='' return str(traceback.format_tb(trace)),str(sys.exc_type),exc_value def get_errors(): try:
def __init__(self, profile=Utils.getConfig("profile", "firefox")): #1. Add HTTP-Request Logger extension to Firefox #2. Set Firefox profile so that HTTP-Request Logger will be available self.fp = webdriver.FirefoxProfile(profile)