Ejemplo n.º 1
0
    def unknown_starttag(self, tag, attrs):
        if self.in_Script:
            self.handle_data(self.get_starttag_text())
            return

        config.VERBOSE(config.VERBOSE_DEBUG,
                       "[DEBUG] [PageParser.py] Tag: " + tag)
        if self.endearly:
            return

        domobj = DOMObject(self.__dict__['__window'], tag, self)
        #sometimes k in tag is not really attrname, so a transform is needed.
        #note that this is IE way. In firefox transform is done in DOMObject.setAttribute()
        for name, value in attrs:
            domobj.setAttribute(dataetc.attrTrans(name, tag), value)
            if dataetc.isevent(name.lower(), tag):
                self.emulate_timeout(name, value)

        if tag == 'script':
            domobj.__dict__['script'] = ''

        if config.retrieval_all:
            if 'src' in domobj.__dict__:
                src = self.__dict__['__window'].document.location.fix_url(
                    domobj.src)
                script, headers = hc.get(
                    src, self.__dict__['__window'].document.location.href)
                # if config.replace_nonascii:
                #     script = re.sub('[\x80-\xff]',' ',script)

        try:
            begin = self.html.lower()[self.current:].index('<' + tag)
            start = self.current + begin
            offset = begin + self.html.lower()[start:].index('>') + 1

            self.current += offset
            domobj.__dict__['begin'] = self.current
            domobj.__dict__['end'] = self.current + self.html.lower(
            )[self.current:].index('</' + tag)

            if (tag == 'div' and attrs) or tag == 'body':
                domobj.innerHTML = self.html[domobj.__dict__['begin']:domobj.
                                             __dict__['end']]
        except:
            pass

        self.DOM_stack[-1].appendChild(domobj)
        self.DOM_stack.append(domobj)

        if tag == 'form':
            self.__dict__['__window'].__dict__['__fl'].append(domobj)
        if tag == 'br' or tag == 'meta':
            self.unknown_endtag(tag)  # <br> and <meta> have no end tag.
        if tag == 'select':
            self.lastselect = domobj
        if tag == 'option':
            try:
                self.lastselect.options.append(domobj)
            except:
                pass
Ejemplo n.º 2
0
    def unknown_starttag(self, tag, attrs):
        if self.in_Script:
            self.handle_data(self.get_starttag_text())
            return

        config.VERBOSE(config.VERBOSE_DEBUG, "[DEBUG] [PageParser.py] Tag: " + tag)
        if self.endearly: 
            return
       
        domobj = DOMObject(self.__dict__['__window'], tag, self)
        #sometimes k in tag is not really attrname, so a transform is needed.
        #note that this is IE way. In firefox transform is done in DOMObject.setAttribute()
        for name, value in attrs:
            domobj.setAttribute(dataetc.attrTrans(name, tag), value)
            if dataetc.isevent(name.lower(), tag):
                self.emulate_timeout(name, value)


        if tag == 'script':
            domobj.__dict__['script'] = ''

        if config.retrieval_all:
            if 'src' in domobj.__dict__:
                src = self.__dict__['__window'].document.location.fix_url(domobj.src)
                script, headers = hc.get(src, self.__dict__['__window'].document.location.href)
                # if config.replace_nonascii:
                #     script = re.sub('[\x80-\xff]',' ',script)

        try:
            begin  = self.html.lower()[self.current:].index('<' + tag)
            start  = self.current + begin
            offset = begin + self.html.lower()[start:].index('>') + 1

            self.current += offset
            domobj.__dict__['begin'] = self.current
            domobj.__dict__['end']   = self.current + self.html.lower()[self.current:].index('</'+tag) 

            if (tag == 'div' and attrs) or tag == 'body':
                domobj.innerHTML = self.html[domobj.__dict__['begin']:domobj.__dict__['end']]
        except:
            pass

        self.DOM_stack[-1].appendChild(domobj)
        self.DOM_stack.append(domobj)

        if tag == 'form': 
            self.__dict__['__window'].__dict__['__fl'].append(domobj)
        if tag == 'br' or tag == 'meta': 
            self.unknown_endtag(tag) # <br> and <meta> have no end tag.
        if tag == 'select': 
            self.lastselect = domobj
        if tag == 'option':
            try: 
                self.lastselect.options.append(domobj)
            except: 
                pass
Ejemplo n.º 3
0
    def createElement(self, type):
        """
        Creates an element of the type specified. Note that the instance returned implements 
        the Element interface, so attributes can be specified directly on the returned object.
        
        Syntax

        element = element.createElement(type) 

        Parameters

        element is an object.

        type is a string that represents the type of element to be created.
        """
        DOMObject(self.contentWindow, type, None)
        return self.all[-1]
Ejemplo n.º 4
0
 def __init__(self):
     self.__dict__.update(self.inits)
     DOMObject.__init__(self, self.window, self.tagName, self.parser)
Ejemplo n.º 5
0
 def __init__(self):
     self.__dict__.update(self.inits)
     DOMObject.__init__(self, self.window, self.tagName, self.parser)