def submit(self): self.trace() self.response = self.browser.submit() self.text = self.Decode(self.response.read()) self.parsed = htmldom(self.Encode(self.text)) self.handleHtmlErrors()
def submit( self ): self.trace() self.response = self.browser.submit() self.text = self.Decode(self.response.read()) self.parsed = htmldom(self.Encode(self.text)) self.handleHtmlErrors()
def load(self, url=None, title=None, parameters=None, GET=False, visit=False, parse_as=None, unicodify=True, censor_url=False, catch_errors=True): (self.trace() if not censor_url else self.trace( overrides={'parameters': '<<hidden>>'})) # get URL & query string if not url: if not title: raise self.Error, 'Browser::load called with no URL or page title to load' url = self.url_base + self.path_article + title if parameters: parameters = self.UrlEncode(parameters) # force GET mode? if GET and parameters: if url.find('?') != -1: url = '%s%s' % (url, '&%s' % parameters) else: url = '%s%s' % (url, '?%s' % parameters) parameters = None self.last_url = url else: self.last_url = u'%s << %s' % (url, parameters) if censor_url: self.last_url = '<<hidden>>' self.traceMessage(self.last_url) # load page if visit: self.response = self.browser.open(url, parameters) # bugfix -- mechanize confused by <br/> self.response.set_data(self.response.get_data().replace( "<br/>", "<br />")) self.browser.set_response(self.response) else: self.response = self.browser.open_novisit(url, parameters) # fetch page text self.text = self.response.read() if unicodify: self.text = self.Decode(self.text) # parse page text if not parse_as: self.parsed = None return parse_as = parse_as.lower() if parse_as == 'xml': self.parsed = xmldom.parseString(self.Encode(self.text)) if catch_errors: self.handleApiErrors() elif parse_as == 'html': self.parsed = htmldom(self.Encode(self.text)) if catch_errors: self.handleHtmlErrors() elif parse_as == 'json': self.parsed = simplejson.loads(self.Encode(self.text)) else: raise self.Error, 'Browser::load cannot parse text as "%s", unrecognized format' % parse_as
def load( self, url = None, title = None, parameters = None, GET = False, visit = False, parse_as = None, unicodify = True, censor_url = False, catch_errors = True ): (self.trace() if not censor_url else self.trace(overrides = {'parameters':'<<hidden>>'})) # get URL & query string if not url: if not title: raise self.Error, 'Browser::load called with no URL or page title to load' url = self.url_base + self.path_article + title if parameters: parameters = self.UrlEncode( parameters ) # force GET mode? if GET and parameters: if url.find( '?' ) != -1: url = '%s%s' % (url, '&%s' % parameters) else: url = '%s%s' % (url, '?%s' % parameters) parameters = None self.last_url = url else: self.last_url = u'%s << %s' % (url, parameters) if censor_url: self.last_url = '<<hidden>>' self.traceMessage(self.last_url) # load page if visit: self.response = self.browser.open( url, parameters ) # bugfix -- mechanize confused by <br/> self.response.set_data(self.response.get_data().replace("<br/>", "<br />")) self.browser.set_response(self.response) else: self.response = self.browser.open_novisit( url, parameters ) # fetch page text self.text = self.response.read() if unicodify: self.text = self.Decode(self.text) # parse page text if not parse_as: self.parsed = None return parse_as = parse_as.lower() if parse_as == 'xml': self.parsed = xmldom.parseString(self.Encode(self.text)) if catch_errors: self.handleApiErrors() elif parse_as == 'html': self.parsed = htmldom(self.Encode(self.text)) if catch_errors: self.handleHtmlErrors() elif parse_as == 'json': self.parsed = simplejson.loads(self.Encode(self.text)) else: raise self.Error, 'Browser::load cannot parse text as "%s", unrecognized format' % parse_as