def __window_eval(self, script): config.VERBOSE(config.VERBOSE_DEBUG, "[DEBUG] Got eval, evaling...") config.VERBOSE(config.VERBOSE_DETAIL, str(script)) if not type(script) in types.StringTypes: return script try: ret = self.__dict__['__cx'].execute(self.__dict__['__cx'].patch_script(script)) return ret except: #traceback.print_exc() if script: self.__dict__['__lastscript'] = script
def start_object(self, attrs): attrs = self.object_fix(attrs) domobj = None for k, v in attrs: if k == 'classid': try: domobj = ActiveXObject(v, 'id') except UserWarning: pass # ActiveX object may be initiallized by classid or classname. # If created by 'object' tag, classid will be used. Check out # the class definition in 'ActiveX/ActiveX.py' for more. if not domobj: config.VERBOSE( config.VERBOSE_DEBUG, "[DEBUG] PageParser.py: Ignoring start_object attrs: " + str(attrs)) self.ignoreObj = True return for k, v in attrs: if k == 'id' or k == 'name': if self.__dict__['__window'].__dict__['__cx'].execute( 'typeof ' + v + ' == "undefined"'): self.__dict__['__window'].__dict__['__cx'].add_global( v, domobj) self.__dict__['__window'].__dict__['__fl'][-1].__setattr__( v, domobj) domobj.__setattr__(dataetc.attrTrans(k, 'object'), v) self.DOM_stack[-1].appendChild(domobj) self.DOM_stack.append(domobj)
def unknown_starttag(self, tag, attrs): if self.in_Script: self.handle_data(self.get_starttag_text()) return config.VERBOSE(config.VERBOSE_DEBUG, "[DEBUG] [PageParser.py] Tag: " + tag) if self.endearly: return domobj = DOMObject(self.__dict__['__window'], tag, self) #sometimes k in tag is not really attrname, so a transform is needed. #note that this is IE way. In firefox transform is done in DOMObject.setAttribute() for name, value in attrs: domobj.setAttribute(dataetc.attrTrans(name, tag), value) if dataetc.isevent(name.lower(), tag): self.emulate_timeout(name, value) if tag == 'script': domobj.__dict__['script'] = '' if config.retrieval_all: if 'src' in domobj.__dict__: src = self.__dict__['__window'].document.location.fix_url( domobj.src) script, headers = hc.get( src, self.__dict__['__window'].document.location.href) # if config.replace_nonascii: # script = re.sub('[\x80-\xff]',' ',script) try: begin = self.html.lower()[self.current:].index('<' + tag) start = self.current + begin offset = begin + self.html.lower()[start:].index('>') + 1 self.current += offset domobj.__dict__['begin'] = self.current domobj.__dict__['end'] = self.current + self.html.lower( )[self.current:].index('</' + tag) if (tag == 'div' and attrs) or tag == 'body': domobj.innerHTML = self.html[domobj.__dict__['begin']:domobj. __dict__['end']] except: pass self.DOM_stack[-1].appendChild(domobj) self.DOM_stack.append(domobj) if tag == 'form': self.__dict__['__window'].__dict__['__fl'].append(domobj) if tag == 'br' or tag == 'meta': self.unknown_endtag(tag) # <br> and <meta> have no end tag. if tag == 'select': self.lastselect = domobj if tag == 'option': try: self.lastselect.options.append(domobj) except: pass
def start_script(self, attrs): for k, v in attrs: if k.lower( ) == 'language' and not v.lower().startswith('javascript'): config.VERBOSE( config.VERBOSE_DEBUG, "[DEBUG] in PageParser.py: Ignoring(ignoreScript) start_object attrs: " + str(attrs)) self.ignoreScript = True return self.unknown_starttag('script', attrs) self.in_Script = True self.literal = 1 if 'src' in self.DOM_stack[-1].__dict__: src = self.__dict__['__window'].document.location.fix_url( self.DOM_stack[-1].src) script, headers = hc.get( src, self.__dict__['__window'].document.location.href) if config.replace_nonascii: script = re.sub('[\x80-\xff]', ' ', script) self.DOM_stack[-1].__dict__['script'] += script #self.literal = 0 self.__dict__['__window'].__dict__['__sl'].append( self.DOM_stack[-1]) self.end_script() return self.__dict__['__window'].__dict__['__sl'].append(self.DOM_stack[-1])
def __setattr__(self, name, val): config.VERBOSE( config.VERBOSE_DEBUG, "[DEBUG] in unknown.py: Attr %s set to: %s" % ( name, val, ))
def __fetch(self, url, method="get", post_data=False, referrer=False): """hidden, called from get() or post()""" # TODO # http_proxy=http://localhost:8118 <-- not needed yet # auto-follow location headers (see curl -e) self.headers = '' scheme, netloc, path, query, fragment = urlparse.urlsplit(url) if scheme.lower() in ('file', ): filename = url[7:] if filename in [ "about:blank", ]: return '' f = file(filename, 'r') res = f.read() f.close() return res if config.cache_response: hashkey = url + method + str(post_data) + str(referrer) if self.pagecache.has_key(hashkey): self.headers = self.pagecache[hashkey][1] return self.pagecache[hashkey][0] reload(sys) sys.setdefaultencoding('utf-8') _cb = ReadCallback() url = urllib2.unquote(url) if url.find("/", 8) < 0: url += "/" config.VERBOSE(config.VERBOSE_REFGRAPH, '[REFGRAPH] "' + str(referrer) + '"->"' + url + '"') self.__saveurl(url) c = pycurl.Curl() c.setopt(pycurl.FOLLOWLOCATION, 1) c.setopt(pycurl.AUTOREFERER, 1) #c.setopt(pycurl.MAXREDIRS, 200) c.setopt(pycurl.URL, str(url)) c.setopt(pycurl.WRITEFUNCTION, _cb.body_cb) c.setopt(pycurl.USERAGENT, self.ua) #c.setopt(pycurl.VERBOSE, 1) c.setopt(pycurl.HEADERFUNCTION, self.header) c.setopt(pycurl.CONNECTTIMEOUT, 30) c.setopt(pycurl.TIMEOUT, 30) if post_data and method.lower() == "post": c.setopt(pycurl.UPLOAD, 1) c.setopt(pycurl.READFUNCTION, post_data) c.setopt(pycurl.INFILESIZE, len(post_data)) if referrer: try: c.setopt(pycurl.REFERER, str(referrer)) except Exception, e: traceback.print_exc()
def write(self, text): """ Writes a string of text to a document stream. Syntax document.write(text) Parameters text is a string containing the text to be written to the current document. """ config.VERBOSE(config.VERBOSE_DEBUG, '[DEBUG] in Document.py Document.write(ln)...') config.VERBOSE(config.VERBOSE_DETAIL, str(text)) self.__dict__['__dynamic'].append(text) content = ''.join(self.__dict__['__dynamic']) p = PageParser(self.contentWindow, self.contentWindow.__dict__['__sl'][-1], content, True)
def handle_src(self, name, val): url = self.__dict__['__window'].document.location.fix_url(val) if config.retrieval_all: hc.get(url, self.__dict__['__window'].document.location.href) scheme, netloc, path, query, fragment = urlparse.urlsplit(url) if scheme not in ('http','file','https','ftp'): config.VERBOSE(config.VERBOSE_WARNING, "[WARNING] Got unknown scheme: %s in %s.%s ."%(url,self.tagName, name)); if 'onerror' in self.__dict__: config.VERBOSE(config.VERBOSE_DEBUG, "[DEBUG] Calling onerror of %s."%(self.tagName)); self.onerror() if self.tagName == "iframe": from Window import Window from PageParser import PageParser window = Window(self.__dict__['__window'].__dict__['__root'], self.__dict__['__window'].document.location.fix_url(val), self.__dict__['__window'].document.location.href) parser = PageParser(window, window.document, window.__dict__['__html']) parser.close()
def write_log(filename): if not eventlist: config.VERBOSE(config.VERBOSE_DEBUG, '[DEBUG] in ActiveX.py: No ActiveXObject found.') return try: fd = open('log/' + filename, 'wb') for log in eventlist: fd.write(log + '\n') fd.close() print 'Log written into: log/' + filename except IOError: pass
def __window_alert(self, text): """ Display an alert dialog with the specified text. Syntax window.alert(text) Parameters text is a string of the text you want displayed in the alert dialog. """ print str(text) config.VERBOSE(config.VERBOSE_DEBUG, '[DEBUG] alertmsg: ' + str(text))
def download(url): f = hashlib.md5() f.update(url) filename = "%s/%s" % ( BINARIES_DIR, f.hexdigest(), ) fd = open(filename, 'wb') ua = config.userAgent c = pycurl.Curl() c.setopt(pycurl.FOLLOWLOCATION, 1) c.setopt(pycurl.URL, str(url)) c.setopt(pycurl.WRITEDATA, fd) c.setopt(pycurl.USERAGENT, ua) try: c.perform() code = c.getinfo(pycurl.HTTP_CODE) if code == 404: config.VERBOSE(config.VERBOSE_DEBUG, "[DEBUG] 404 File Not Found: " + url) fd.close() os.remove(filename) return except: import traceback traceback.print_exc(file=sys.stderr) sys.stderr.flush() c.close() fd.close() statinfo = os.stat(filename) if not statinfo.st_size: os.remove(filename) return fd = open(filename, 'r') h = hashlib.md5() h.update(fd.read()) newfilename = "%s/%s" % ( BINARIES_DIR, h.hexdigest(), ) shutil.move(filename, newfilename) fd.close()
def __init_lastmodified(self, header): p = header.split("Last-Modified:")[1].strip() try: t = time.strptime(p, "%a, %d %b %Y %H:%M:%S GMT") except ValueError: try: t = time.strptime(p, "%a, %d %b %Y %H:%M:%SGMT") except ValueError: config.VERBOSE( config.VERBOSE_WARNING, '[WARNING] Error while parsing lastModified [Document.py]') self.lastModified = '' return fmt = "%.2d/%.2d/%.4d %.2d:%.2d:%.2d" self.lastModified = fmt % (t.tm_mon, t.tm_mday, t.tm_year, t.tm_hour, t.tm_min, t.tm_sec)
def __init__(self, cls, clstype = 'name'): config.VERBOSE(config.VERBOSE_WARNING, "[WARNING] New ActiveX Object: " + cls) unknownObject.__init__(self, cls) filename = '' if clstype == 'id': if len(cls) >= 6 and (cls[0:6] == 'clsid:' or cls[0:6] == 'CLSID:'): cls = cls[6:].upper() if cls in clsidlist.keys(): filename = clsidlist[cls] else: if cls in clsnamelist: filename = clsnamelist[cls] self.__dict__['__name'] = filename # config.VERBOSE(config.VERBOSE_WARNING, config.universal_activex) if not config.universal_activex: self.check_raise_warning(filename, cls) if filename: exec load_src(filename)
def add_alert(alert): config.VERBOSE(config.VERBOSE_DEFAULT, '[ALERT] ' + alert)