Exemple #1
0
    def __window_eval(self, script):
        config.VERBOSE(config.VERBOSE_DEBUG, "[DEBUG] Got eval, evaling...")
        config.VERBOSE(config.VERBOSE_DETAIL, str(script))

        if not type(script) in types.StringTypes:
            return script

        try:
            ret = self.__dict__['__cx'].execute(self.__dict__['__cx'].patch_script(script))
            return ret
        except:
            #traceback.print_exc()
            if script:
                self.__dict__['__lastscript'] = script
    def start_object(self, attrs):
        attrs = self.object_fix(attrs)
        domobj = None

        for k, v in attrs:
            if k == 'classid':
                try:
                    domobj = ActiveXObject(v, 'id')
                except UserWarning:
                    pass
                # ActiveX object may be initiallized by classid or classname.
                # If created by 'object' tag, classid will be used. Check out
                # the class definition in 'ActiveX/ActiveX.py' for more.

        if not domobj:
            config.VERBOSE(
                config.VERBOSE_DEBUG,
                "[DEBUG] PageParser.py: Ignoring start_object attrs: " +
                str(attrs))
            self.ignoreObj = True
            return

        for k, v in attrs:
            if k == 'id' or k == 'name':
                if self.__dict__['__window'].__dict__['__cx'].execute(
                        'typeof ' + v + ' == "undefined"'):
                    self.__dict__['__window'].__dict__['__cx'].add_global(
                        v, domobj)
                self.__dict__['__window'].__dict__['__fl'][-1].__setattr__(
                    v, domobj)
            domobj.__setattr__(dataetc.attrTrans(k, 'object'), v)

        self.DOM_stack[-1].appendChild(domobj)
        self.DOM_stack.append(domobj)
    def unknown_starttag(self, tag, attrs):
        if self.in_Script:
            self.handle_data(self.get_starttag_text())
            return

        config.VERBOSE(config.VERBOSE_DEBUG,
                       "[DEBUG] [PageParser.py] Tag: " + tag)
        if self.endearly:
            return

        domobj = DOMObject(self.__dict__['__window'], tag, self)
        #sometimes k in tag is not really attrname, so a transform is needed.
        #note that this is IE way. In firefox transform is done in DOMObject.setAttribute()
        for name, value in attrs:
            domobj.setAttribute(dataetc.attrTrans(name, tag), value)
            if dataetc.isevent(name.lower(), tag):
                self.emulate_timeout(name, value)

        if tag == 'script':
            domobj.__dict__['script'] = ''

        if config.retrieval_all:
            if 'src' in domobj.__dict__:
                src = self.__dict__['__window'].document.location.fix_url(
                    domobj.src)
                script, headers = hc.get(
                    src, self.__dict__['__window'].document.location.href)
                # if config.replace_nonascii:
                #     script = re.sub('[\x80-\xff]',' ',script)

        try:
            begin = self.html.lower()[self.current:].index('<' + tag)
            start = self.current + begin
            offset = begin + self.html.lower()[start:].index('>') + 1

            self.current += offset
            domobj.__dict__['begin'] = self.current
            domobj.__dict__['end'] = self.current + self.html.lower(
            )[self.current:].index('</' + tag)

            if (tag == 'div' and attrs) or tag == 'body':
                domobj.innerHTML = self.html[domobj.__dict__['begin']:domobj.
                                             __dict__['end']]
        except:
            pass

        self.DOM_stack[-1].appendChild(domobj)
        self.DOM_stack.append(domobj)

        if tag == 'form':
            self.__dict__['__window'].__dict__['__fl'].append(domobj)
        if tag == 'br' or tag == 'meta':
            self.unknown_endtag(tag)  # <br> and <meta> have no end tag.
        if tag == 'select':
            self.lastselect = domobj
        if tag == 'option':
            try:
                self.lastselect.options.append(domobj)
            except:
                pass
    def start_script(self, attrs):
        for k, v in attrs:
            if k.lower(
            ) == 'language' and not v.lower().startswith('javascript'):
                config.VERBOSE(
                    config.VERBOSE_DEBUG,
                    "[DEBUG] in PageParser.py: Ignoring(ignoreScript) start_object attrs: "
                    + str(attrs))
                self.ignoreScript = True
                return

        self.unknown_starttag('script', attrs)
        self.in_Script = True
        self.literal = 1

        if 'src' in self.DOM_stack[-1].__dict__:
            src = self.__dict__['__window'].document.location.fix_url(
                self.DOM_stack[-1].src)
            script, headers = hc.get(
                src, self.__dict__['__window'].document.location.href)
            if config.replace_nonascii:
                script = re.sub('[\x80-\xff]', ' ', script)
            self.DOM_stack[-1].__dict__['script'] += script
            #self.literal = 0
            self.__dict__['__window'].__dict__['__sl'].append(
                self.DOM_stack[-1])
            self.end_script()
            return

        self.__dict__['__window'].__dict__['__sl'].append(self.DOM_stack[-1])
Exemple #5
0
 def __setattr__(self, name, val):
     config.VERBOSE(
         config.VERBOSE_DEBUG,
         "[DEBUG] in unknown.py: Attr %s set to: %s" % (
             name,
             val,
         ))
    def __fetch(self, url, method="get", post_data=False, referrer=False):
        """hidden, called from get() or post()"""
        # TODO
        # http_proxy=http://localhost:8118  <-- not needed yet
        # auto-follow location headers (see curl -e)

        self.headers = ''
        scheme, netloc, path, query, fragment = urlparse.urlsplit(url)
        if scheme.lower() in ('file', ):
            filename = url[7:]
            if filename in [
                    "about:blank",
            ]:
                return ''

            f = file(filename, 'r')
            res = f.read()
            f.close()
            return res

        if config.cache_response:
            hashkey = url + method + str(post_data) + str(referrer)
            if self.pagecache.has_key(hashkey):
                self.headers = self.pagecache[hashkey][1]
                return self.pagecache[hashkey][0]

        reload(sys)
        sys.setdefaultencoding('utf-8')
        _cb = ReadCallback()

        url = urllib2.unquote(url)
        if url.find("/", 8) < 0:
            url += "/"
        config.VERBOSE(config.VERBOSE_REFGRAPH,
                       '[REFGRAPH] "' + str(referrer) + '"->"' + url + '"')

        self.__saveurl(url)
        c = pycurl.Curl()
        c.setopt(pycurl.FOLLOWLOCATION, 1)
        c.setopt(pycurl.AUTOREFERER, 1)
        #c.setopt(pycurl.MAXREDIRS, 200)
        c.setopt(pycurl.URL, str(url))
        c.setopt(pycurl.WRITEFUNCTION, _cb.body_cb)
        c.setopt(pycurl.USERAGENT, self.ua)
        #c.setopt(pycurl.VERBOSE, 1)
        c.setopt(pycurl.HEADERFUNCTION, self.header)
        c.setopt(pycurl.CONNECTTIMEOUT, 30)
        c.setopt(pycurl.TIMEOUT, 30)

        if post_data and method.lower() == "post":
            c.setopt(pycurl.UPLOAD, 1)
            c.setopt(pycurl.READFUNCTION, post_data)
            c.setopt(pycurl.INFILESIZE, len(post_data))

        if referrer:
            try:
                c.setopt(pycurl.REFERER, str(referrer))
            except Exception, e:
                traceback.print_exc()
Exemple #7
0
    def write(self, text):
        """
        Writes a string of text to a document stream.
        Syntax

        document.write(text) 

        Parameters

        text is a string containing the text to be written to the current
        document.
        """
        config.VERBOSE(config.VERBOSE_DEBUG,
                       '[DEBUG] in Document.py Document.write(ln)...')
        config.VERBOSE(config.VERBOSE_DETAIL, str(text))

        self.__dict__['__dynamic'].append(text)
        content = ''.join(self.__dict__['__dynamic'])
        p = PageParser(self.contentWindow,
                       self.contentWindow.__dict__['__sl'][-1], content, True)
    def handle_src(self, name, val):
        url = self.__dict__['__window'].document.location.fix_url(val)

        if config.retrieval_all:
            hc.get(url, self.__dict__['__window'].document.location.href)
        
        scheme, netloc, path, query, fragment = urlparse.urlsplit(url)
        if scheme not in ('http','file','https','ftp'):
            config.VERBOSE(config.VERBOSE_WARNING, "[WARNING] Got unknown scheme: %s in %s.%s ."%(url,self.tagName, name));
            if 'onerror' in self.__dict__:
                config.VERBOSE(config.VERBOSE_DEBUG, "[DEBUG] Calling onerror of %s."%(self.tagName));
                self.onerror()

        if self.tagName == "iframe":
            from Window import Window
            from PageParser import PageParser
            window = Window(self.__dict__['__window'].__dict__['__root'],
                            self.__dict__['__window'].document.location.fix_url(val),
                            self.__dict__['__window'].document.location.href)
            parser = PageParser(window, window.document, window.__dict__['__html'])
            parser.close()
Exemple #9
0
def write_log(filename):
    if not eventlist:
        config.VERBOSE(config.VERBOSE_DEBUG, '[DEBUG] in ActiveX.py: No ActiveXObject found.')
        return

    try:
        fd = open('log/' + filename, 'wb')
        for log in eventlist: 
            fd.write(log + '\n')
        fd.close()
        print 'Log written into: log/' + filename
    except IOError:
        pass
Exemple #10
0
    def __window_alert(self, text):
        """
        Display an alert dialog with the specified text.
        Syntax

        window.alert(text) 

        Parameters

        text is a string of the text you want displayed in the alert dialog.
        """
        print str(text)
        config.VERBOSE(config.VERBOSE_DEBUG, '[DEBUG] alertmsg: ' + str(text))
Exemple #11
0
def download(url):
    f = hashlib.md5()
    f.update(url)
    filename = "%s/%s" % (
        BINARIES_DIR,
        f.hexdigest(),
    )

    fd = open(filename, 'wb')
    ua = config.userAgent

    c = pycurl.Curl()
    c.setopt(pycurl.FOLLOWLOCATION, 1)
    c.setopt(pycurl.URL, str(url))
    c.setopt(pycurl.WRITEDATA, fd)
    c.setopt(pycurl.USERAGENT, ua)

    try:
        c.perform()
        code = c.getinfo(pycurl.HTTP_CODE)
        if code == 404:
            config.VERBOSE(config.VERBOSE_DEBUG,
                           "[DEBUG] 404 File Not Found: " + url)
            fd.close()
            os.remove(filename)
            return
    except:
        import traceback
        traceback.print_exc(file=sys.stderr)
        sys.stderr.flush()

    c.close()
    fd.close()

    statinfo = os.stat(filename)
    if not statinfo.st_size:
        os.remove(filename)
        return

    fd = open(filename, 'r')
    h = hashlib.md5()
    h.update(fd.read())
    newfilename = "%s/%s" % (
        BINARIES_DIR,
        h.hexdigest(),
    )
    shutil.move(filename, newfilename)
    fd.close()
Exemple #12
0
    def __init_lastmodified(self, header):
        p = header.split("Last-Modified:")[1].strip()
        try:
            t = time.strptime(p, "%a, %d %b %Y %H:%M:%S GMT")
        except ValueError:
            try:
                t = time.strptime(p, "%a, %d %b %Y %H:%M:%SGMT")
            except ValueError:
                config.VERBOSE(
                    config.VERBOSE_WARNING,
                    '[WARNING] Error while parsing lastModified [Document.py]')
                self.lastModified = ''
                return

        fmt = "%.2d/%.2d/%.4d %.2d:%.2d:%.2d"
        self.lastModified = fmt % (t.tm_mon, t.tm_mday, t.tm_year, t.tm_hour,
                                   t.tm_min, t.tm_sec)
Exemple #13
0
    def __init__(self, cls, clstype = 'name'):
        config.VERBOSE(config.VERBOSE_WARNING, "[WARNING] New ActiveX Object: " + cls)

        unknownObject.__init__(self, cls)
        filename = ''
        if clstype == 'id':
            if len(cls) >= 6 and (cls[0:6] == 'clsid:' or cls[0:6] == 'CLSID:'):
                cls = cls[6:].upper()
            if cls in clsidlist.keys(): 
                filename = clsidlist[cls]
        else:
            if cls in clsnamelist: 
                filename = clsnamelist[cls]

        self.__dict__['__name'] = filename
#       config.VERBOSE(config.VERBOSE_WARNING, config.universal_activex)
        if not config.universal_activex:
            self.check_raise_warning(filename, cls)
        if filename:
            exec load_src(filename)
Exemple #14
0
def add_alert(alert):
    config.VERBOSE(config.VERBOSE_DEFAULT, '[ALERT] ' + alert)