def __init__(self): logging.config.fileConfig("logging.ini") self.__logger = logging.getLogger('jsonpost') c = configuration.configuration() c.fileConfig("configuration.ini") self.__RETRY_TIMES__ = int(c.getValue("Runtime", "retry_times")) self.__PAGE_INTERVAL__ = int(c.getValue("Runtime", "page_interval")) self.__h = htmlparser.htmlpaser()
def __init__(self, baseurl): self.urlmanager = urlmanager() self.htmldownloader = htmldownloader(baseurl=baseurl) self.htmlparser = htmlpaser() self.dataoutput = dataoutput() self.max_iter_time = max_iter_time self.sleeptime = sleeptime
def __init__(self): logging.config.fileConfig("logging.ini") self.__logger = logging.getLogger('grab') self.__c = configuration.configuration() self.__c.fileConfig("configuration.ini") self.__RETRY_TIMES__ = int(self.__c.getValue("Runtime", "retry_times")) self.__f = formpage.formpage() self.__h = htmlparser.htmlpaser() self.__j = jsonpost.jsonpost() self.__js = jspost.jspost() self.__taskID = ""
def __init__(self): #self.__pageinfo = "" # Browser self.__br = mechanize.Browser() self.__br_i = mechanize.Browser() #self.__br = mechanize.urlopen("https://taobao.com") # Cookie Jar cj = cookielib.LWPCookieJar() self.__br.set_cookiejar(cj) self.__br_i.set_cookiejar(cj) # Browser options self.__br.set_handle_equiv(True) self.__br.set_handle_gzip(True) self.__br.set_handle_redirect(True) self.__br.set_handle_referer(True) self.__br.set_handle_robots(False) # Browser options self.__br_i.set_handle_equiv(True) self.__br_i.set_handle_gzip(True) self.__br_i.set_handle_redirect(True) self.__br_i.set_handle_referer(True) self.__br_i.set_handle_robots(False) # Follows refresh 0 but not hangs on refresh > 0 self.__br.set_handle_refresh(mechanize._http.HTTPRefreshProcessor(), max_time=1) # Follows refresh 0 but not hangs on refresh > 0 self.__br_i.set_handle_refresh(mechanize._http.HTTPRefreshProcessor(), max_time=1) # Want debugging messages? #br.set_debug_http(True) #br.set_debug_redirects(True) #br.set_debug_responses(True) # User-Agent (this is cheating, ok?) self.__br.addheaders = [('User-agent', 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1')] self.__br_i.addheaders = [('User-agent', 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1')] logging.config.fileConfig("logging.ini") self.__logger = logging.getLogger('formpage') c = configuration.configuration() c.fileConfig("configuration.ini") self.__RETRY_TIMES__ = int(c.getValue("Runtime","retry_times")) self.__PAGE_INTERVAL__ = int(c.getValue("Runtime","page_interval")) self.__h = htmlparser.htmlpaser()