def __setattr__(self, name, val): try: handler = getattr(self, "handle_%s" % (name, )) handler(name, val) except: pass #if it's an event, let it be a function if dataetc.isevent(name, self.tagName): # using 'this' in methods may cause additional problems. # i think i find a way to handle this, but there could # be some cases it cannot cover. val = str(val) + ';' cx = self.__dict__['__window'].__dict__['__cx'] val = cx.patch_script(val) try: if 'id' in self.__dict__: vals = re.split('(?<=[^a-zA-Z0-9_])this(?=[^a-zA-Z0-9_])', val) valstmp = re.split('^this(?=[^a-zA-Z0-9_])', vals[0]) if len(vals) > 1: vals = valstmp + vals[1:] valstmp = re.split('(?<=[^a-zA-Z0-9_])this$', vals[-1]) if len(vals) > 1: vals = vals[:-1] + valstmp val = self.id.join(vals) self.__dict__[name] = cx.execute('function(){' + val + '}') except: try: p = val.decode('string-escape') self.__dict__[name] = cx.execute('function(){' + p + '}') except: print val #traceback.print_exc() self.__dict__[name] = val
def unknown_starttag(self, tag, attrs): if self.in_Script: self.handle_data(self.get_starttag_text()) return config.VERBOSE(config.VERBOSE_DEBUG, "[DEBUG] [PageParser.py] Tag: " + tag) if self.endearly: return domobj = DOMObject(self.__dict__['__window'], tag, self) #sometimes k in tag is not really attrname, so a transform is needed. #note that this is IE way. In firefox transform is done in DOMObject.setAttribute() for name, value in attrs: domobj.setAttribute(dataetc.attrTrans(name, tag), value) if dataetc.isevent(name.lower(), tag): self.emulate_timeout(name, value) if tag == 'script': domobj.__dict__['script'] = '' if config.retrieval_all: if 'src' in domobj.__dict__: src = self.__dict__['__window'].document.location.fix_url( domobj.src) script, headers = hc.get( src, self.__dict__['__window'].document.location.href) # if config.replace_nonascii: # script = re.sub('[\x80-\xff]',' ',script) try: begin = self.html.lower()[self.current:].index('<' + tag) start = self.current + begin offset = begin + self.html.lower()[start:].index('>') + 1 self.current += offset domobj.__dict__['begin'] = self.current domobj.__dict__['end'] = self.current + self.html.lower( )[self.current:].index('</' + tag) if (tag == 'div' and attrs) or tag == 'body': domobj.innerHTML = self.html[domobj.__dict__['begin']:domobj. __dict__['end']] except: pass self.DOM_stack[-1].appendChild(domobj) self.DOM_stack.append(domobj) if tag == 'form': self.__dict__['__window'].__dict__['__fl'].append(domobj) if tag == 'br' or tag == 'meta': self.unknown_endtag(tag) # <br> and <meta> have no end tag. if tag == 'select': self.lastselect = domobj if tag == 'option': try: self.lastselect.options.append(domobj) except: pass
def unknown_starttag(self, tag, attrs): if self.in_Script: self.handle_data(self.get_starttag_text()) return config.VERBOSE(config.VERBOSE_DEBUG, "[DEBUG] [PageParser.py] Tag: " + tag) if self.endearly: return domobj = DOMObject(self.__dict__['__window'], tag, self) #sometimes k in tag is not really attrname, so a transform is needed. #note that this is IE way. In firefox transform is done in DOMObject.setAttribute() for name, value in attrs: domobj.setAttribute(dataetc.attrTrans(name, tag), value) if dataetc.isevent(name.lower(), tag): self.emulate_timeout(name, value) if tag == 'script': domobj.__dict__['script'] = '' if config.retrieval_all: if 'src' in domobj.__dict__: src = self.__dict__['__window'].document.location.fix_url(domobj.src) script, headers = hc.get(src, self.__dict__['__window'].document.location.href) # if config.replace_nonascii: # script = re.sub('[\x80-\xff]',' ',script) try: begin = self.html.lower()[self.current:].index('<' + tag) start = self.current + begin offset = begin + self.html.lower()[start:].index('>') + 1 self.current += offset domobj.__dict__['begin'] = self.current domobj.__dict__['end'] = self.current + self.html.lower()[self.current:].index('</'+tag) if (tag == 'div' and attrs) or tag == 'body': domobj.innerHTML = self.html[domobj.__dict__['begin']:domobj.__dict__['end']] except: pass self.DOM_stack[-1].appendChild(domobj) self.DOM_stack.append(domobj) if tag == 'form': self.__dict__['__window'].__dict__['__fl'].append(domobj) if tag == 'br' or tag == 'meta': self.unknown_endtag(tag) # <br> and <meta> have no end tag. if tag == 'select': self.lastselect = domobj if tag == 'option': try: self.lastselect.options.append(domobj) except: pass
def __window_addEventListener(self, type, listener, useCapture = False): if dataetc.isevent(type, 'window'): self.__dict__[type] = listener
def __window_attachEvent(self, sEvent, fpNotify): if dataetc.isevent(sEvent, 'window'): self.__dict__[sEvent] = fpNotify