Example #1
0
    def start_object(self, attrs):
        attrs = self.object_fix(attrs)
        domobj = None

        for k, v in attrs:
            if k == 'classid':
                try:
                    domobj = ActiveXObject(v, 'id')
                except UserWarning:
                    pass
                # ActiveX object may be initiallized by classid or classname.
                # If created by 'object' tag, classid will be used. Check out
                # the class definition in 'ActiveX/ActiveX.py' for more.

        if not domobj:
            config.VERBOSE(
                config.VERBOSE_DEBUG,
                "[DEBUG] PageParser.py: Ignoring start_object attrs: " +
                str(attrs))
            self.ignoreObj = True
            return

        for k, v in attrs:
            if k == 'id' or k == 'name':
                if self.__dict__['__window'].__dict__['__cx'].execute(
                        'typeof ' + v + ' == "undefined"'):
                    self.__dict__['__window'].__dict__['__cx'].add_global(
                        v, domobj)
                self.__dict__['__window'].__dict__['__fl'][-1].__setattr__(
                    v, domobj)
            domobj.__setattr__(dataetc.attrTrans(k, 'object'), v)

        self.DOM_stack[-1].appendChild(domobj)
        self.DOM_stack.append(domobj)
Example #2
0
    def start_object(self, attrs):
        attrs  = self.object_fix(attrs)
        domobj = None

        for k, v in attrs:
            if k == 'classid':
                try:
                    domobj = ActiveXObject(v, 'id')
                except UserWarning:
                    pass
                # ActiveX object may be initiallized by classid or classname.
                # If created by 'object' tag, classid will be used. Check out 
                # the class definition in 'ActiveX/ActiveX.py' for more.

        if not domobj:
            config.VERBOSE(config.VERBOSE_DEBUG, "[DEBUG] PageParser.py: Ignoring start_object attrs: " +str(attrs))
            self.ignoreObj = True
            return

        for k, v in attrs:
            if k == 'id' or k == 'name':
                if self.__dict__['__window'].__dict__['__cx'].execute('typeof ' + v + ' == "undefined"'): 
                    self.__dict__['__window'].__dict__['__cx'].add_global(v, domobj)
                self.__dict__['__window'].__dict__['__fl'][-1].__setattr__(v, domobj)
            domobj.__setattr__(dataetc.attrTrans(k, 'object'), v)

        self.DOM_stack[-1].appendChild(domobj)
        self.DOM_stack.append(domobj)
Example #3
0
    def unknown_starttag(self, tag, attrs):
        if self.in_Script:
            self.handle_data(self.get_starttag_text())
            return

        config.VERBOSE(config.VERBOSE_DEBUG,
                       "[DEBUG] [PageParser.py] Tag: " + tag)
        if self.endearly:
            return

        domobj = DOMObject(self.__dict__['__window'], tag, self)
        #sometimes k in tag is not really attrname, so a transform is needed.
        #note that this is IE way. In firefox transform is done in DOMObject.setAttribute()
        for name, value in attrs:
            domobj.setAttribute(dataetc.attrTrans(name, tag), value)
            if dataetc.isevent(name.lower(), tag):
                self.emulate_timeout(name, value)

        if tag == 'script':
            domobj.__dict__['script'] = ''

        if config.retrieval_all:
            if 'src' in domobj.__dict__:
                src = self.__dict__['__window'].document.location.fix_url(
                    domobj.src)
                script, headers = hc.get(
                    src, self.__dict__['__window'].document.location.href)
                # if config.replace_nonascii:
                #     script = re.sub('[\x80-\xff]',' ',script)

        try:
            begin = self.html.lower()[self.current:].index('<' + tag)
            start = self.current + begin
            offset = begin + self.html.lower()[start:].index('>') + 1

            self.current += offset
            domobj.__dict__['begin'] = self.current
            domobj.__dict__['end'] = self.current + self.html.lower(
            )[self.current:].index('</' + tag)

            if (tag == 'div' and attrs) or tag == 'body':
                domobj.innerHTML = self.html[domobj.__dict__['begin']:domobj.
                                             __dict__['end']]
        except:
            pass

        self.DOM_stack[-1].appendChild(domobj)
        self.DOM_stack.append(domobj)

        if tag == 'form':
            self.__dict__['__window'].__dict__['__fl'].append(domobj)
        if tag == 'br' or tag == 'meta':
            self.unknown_endtag(tag)  # <br> and <meta> have no end tag.
        if tag == 'select':
            self.lastselect = domobj
        if tag == 'option':
            try:
                self.lastselect.options.append(domobj)
            except:
                pass
Example #4
0
    def unknown_starttag(self, tag, attrs):
        if self.in_Script:
            self.handle_data(self.get_starttag_text())
            return

        config.VERBOSE(config.VERBOSE_DEBUG, "[DEBUG] [PageParser.py] Tag: " + tag)
        if self.endearly: 
            return
       
        domobj = DOMObject(self.__dict__['__window'], tag, self)
        #sometimes k in tag is not really attrname, so a transform is needed.
        #note that this is IE way. In firefox transform is done in DOMObject.setAttribute()
        for name, value in attrs:
            domobj.setAttribute(dataetc.attrTrans(name, tag), value)
            if dataetc.isevent(name.lower(), tag):
                self.emulate_timeout(name, value)


        if tag == 'script':
            domobj.__dict__['script'] = ''

        if config.retrieval_all:
            if 'src' in domobj.__dict__:
                src = self.__dict__['__window'].document.location.fix_url(domobj.src)
                script, headers = hc.get(src, self.__dict__['__window'].document.location.href)
                # if config.replace_nonascii:
                #     script = re.sub('[\x80-\xff]',' ',script)

        try:
            begin  = self.html.lower()[self.current:].index('<' + tag)
            start  = self.current + begin
            offset = begin + self.html.lower()[start:].index('>') + 1

            self.current += offset
            domobj.__dict__['begin'] = self.current
            domobj.__dict__['end']   = self.current + self.html.lower()[self.current:].index('</'+tag) 

            if (tag == 'div' and attrs) or tag == 'body':
                domobj.innerHTML = self.html[domobj.__dict__['begin']:domobj.__dict__['end']]
        except:
            pass

        self.DOM_stack[-1].appendChild(domobj)
        self.DOM_stack.append(domobj)

        if tag == 'form': 
            self.__dict__['__window'].__dict__['__fl'].append(domobj)
        if tag == 'br' or tag == 'meta': 
            self.unknown_endtag(tag) # <br> and <meta> have no end tag.
        if tag == 'select': 
            self.lastselect = domobj
        if tag == 'option':
            try: 
                self.lastselect.options.append(domobj)
            except: 
                pass