def unknown_starttag(self, tag, attrs): if self.in_Script: self.handle_data(self.get_starttag_text()) return config.VERBOSE(config.VERBOSE_DEBUG, "[DEBUG] [PageParser.py] Tag: " + tag) if self.endearly: return domobj = DOMObject(self.__dict__['__window'], tag, self) #sometimes k in tag is not really attrname, so a transform is needed. #note that this is IE way. In firefox transform is done in DOMObject.setAttribute() for name, value in attrs: domobj.setAttribute(dataetc.attrTrans(name, tag), value) if dataetc.isevent(name.lower(), tag): self.emulate_timeout(name, value) if tag == 'script': domobj.__dict__['script'] = '' if config.retrieval_all: if 'src' in domobj.__dict__: src = self.__dict__['__window'].document.location.fix_url( domobj.src) script, headers = hc.get( src, self.__dict__['__window'].document.location.href) # if config.replace_nonascii: # script = re.sub('[\x80-\xff]',' ',script) try: begin = self.html.lower()[self.current:].index('<' + tag) start = self.current + begin offset = begin + self.html.lower()[start:].index('>') + 1 self.current += offset domobj.__dict__['begin'] = self.current domobj.__dict__['end'] = self.current + self.html.lower( )[self.current:].index('</' + tag) if (tag == 'div' and attrs) or tag == 'body': domobj.innerHTML = self.html[domobj.__dict__['begin']:domobj. __dict__['end']] except: pass self.DOM_stack[-1].appendChild(domobj) self.DOM_stack.append(domobj) if tag == 'form': self.__dict__['__window'].__dict__['__fl'].append(domobj) if tag == 'br' or tag == 'meta': self.unknown_endtag(tag) # <br> and <meta> have no end tag. if tag == 'select': self.lastselect = domobj if tag == 'option': try: self.lastselect.options.append(domobj) except: pass
def unknown_starttag(self, tag, attrs): if self.in_Script: self.handle_data(self.get_starttag_text()) return config.VERBOSE(config.VERBOSE_DEBUG, "[DEBUG] [PageParser.py] Tag: " + tag) if self.endearly: return domobj = DOMObject(self.__dict__['__window'], tag, self) #sometimes k in tag is not really attrname, so a transform is needed. #note that this is IE way. In firefox transform is done in DOMObject.setAttribute() for name, value in attrs: domobj.setAttribute(dataetc.attrTrans(name, tag), value) if dataetc.isevent(name.lower(), tag): self.emulate_timeout(name, value) if tag == 'script': domobj.__dict__['script'] = '' if config.retrieval_all: if 'src' in domobj.__dict__: src = self.__dict__['__window'].document.location.fix_url(domobj.src) script, headers = hc.get(src, self.__dict__['__window'].document.location.href) # if config.replace_nonascii: # script = re.sub('[\x80-\xff]',' ',script) try: begin = self.html.lower()[self.current:].index('<' + tag) start = self.current + begin offset = begin + self.html.lower()[start:].index('>') + 1 self.current += offset domobj.__dict__['begin'] = self.current domobj.__dict__['end'] = self.current + self.html.lower()[self.current:].index('</'+tag) if (tag == 'div' and attrs) or tag == 'body': domobj.innerHTML = self.html[domobj.__dict__['begin']:domobj.__dict__['end']] except: pass self.DOM_stack[-1].appendChild(domobj) self.DOM_stack.append(domobj) if tag == 'form': self.__dict__['__window'].__dict__['__fl'].append(domobj) if tag == 'br' or tag == 'meta': self.unknown_endtag(tag) # <br> and <meta> have no end tag. if tag == 'select': self.lastselect = domobj if tag == 'option': try: self.lastselect.options.append(domobj) except: pass
def createElement(self, type): """ Creates an element of the type specified. Note that the instance returned implements the Element interface, so attributes can be specified directly on the returned object. Syntax element = element.createElement(type) Parameters element is an object. type is a string that represents the type of element to be created. """ DOMObject(self.contentWindow, type, None) return self.all[-1]
def __init__(self): self.__dict__.update(self.inits) DOMObject.__init__(self, self.window, self.tagName, self.parser)