Ejemplo n.º 1
0
    def _cleanup_html(self, response):
        response.seek(0)
        self._orig_html = response.read()
        self._url = response.geturl()
        response.seek(0)

        self._html = self._orig_html

        from twill.commands import _options
        use_tidy = _options.get('use_tidy')
        if use_tidy:
            (new_html, errors) = run_tidy(self._html)
            if new_html:
                self._html = new_html

######################################################################################
######################################################################################
        strip_scripts = _options.get('strip_scripts')
        if strip_scripts:
            self._html = run_strip_scripts(self._html)
######################################################################################
######################################################################################

        return mechanize.make_response(self._html, response._headers.items(),
                                       response._url, response.code,
                                       response.msg)
Ejemplo n.º 2
0
    def _cleanup_html(self, response):
        response.seek(0)
        self._orig_html = response.read()
        self._url = response.geturl()
        response.seek(0)

        self._html = self._orig_html

        from twill.commands import _options
        use_tidy = _options.get('use_tidy')
        if use_tidy:
            (new_html, errors) = run_tidy(self._html)
            if new_html:
                self._html = new_html

        return mechanize.make_response(self._html, response._headers.items(),
                                       response._url, response.code,
                                       response.msg)
Ejemplo n.º 3
0
def run_tidy(html):
    """
    Run the 'tidy' command-line program on the given HTML string.

    Return a 2-tuple (output, errors).  (None, None) will be returned if
    'tidy' doesn't exist or otherwise fails.
    """
    global _tidy_cmd, _tidy_exists

    from commands import _options
    require_tidy = _options.get('require_tidy')

    if not _tidy_exists:
        if require_tidy:
            raise TwillException("tidy does not exist and require_tidy is set")
        return (None, None)

    #
    # run the command, if we think it exists
    #

    clean_html = None
    if _tidy_exists:
        try:
            process = subprocess.Popen(_tidy_cmd,
                                       stdin=subprocess.PIPE,
                                       stdout=subprocess.PIPE,
                                       stderr=subprocess.PIPE,
                                       bufsize=0,
                                       shell=False)

            (stdout, stderr) = process.communicate(html)

            clean_html = stdout
            errors = stderr
        except OSError:
            _tidy_exists = False

    errors = None
    if require_tidy and clean_html is None:
        raise TwillException("tidy does not exist and require_tidy is set")

    return (clean_html, errors)
Ejemplo n.º 4
0
    def http_response(self, request, response):
        from twill.commands import OUT, _options
        do_refresh = _options.get('acknowledge_equiv_refresh')
        
        code, msg, hdrs = response.code, response.msg, response.info()

        if code == 200 and hdrs.has_key("refresh") and do_refresh:
            refresh = hdrs.getheaders("refresh")[0]
            
            if _debug_print_refresh:
                print>>OUT, "equiv-refresh DEBUG: code 200, hdrs has 'refresh'"
                print>>OUT, "equiv-refresh DEBUG: refresh header is", refresh
                
            i = refresh.find(";")
            if i != -1:
                pause, newurl_spec = refresh[:i], refresh[i+1:]
                pause = int(pause)

                if _debug_print_refresh:
                    print>>OUT, "equiv-refresh DEBUG: pause:", pause
                    print>>OUT, "equiv-refresh DEBUG: new url:", newurl_spec
                
                j = newurl_spec.find("=")
                if j != -1:
                    newurl = newurl_spec[j+1:]
                else:
                    newurl = newurl_spec

                if _debug_print_refresh:
                    print>>OUT, "equiv-refresh DEBUG: final url:", newurl

                print>>OUT, "Following HTTP-EQUIV=REFRESH to %s" % (newurl,)
                    
                if (self.max_time is None) or (pause <= self.max_time):
                    if pause != 0 and 0:  # CTB hack! ==#  and self.honor_time:
                        time.sleep(pause)
                    hdrs["location"] = newurl
                    # hardcoded http is NOT a bug
                    response = self.parent.error(
                        "http", request, response,
                        "refresh", msg, hdrs)

        return response
Ejemplo n.º 5
0
def run_tidy(html):
    """
    Run the 'tidy' command-line program on the given HTML string.

    Return a 2-tuple (output, errors).  (None, None) will be returned if
    'tidy' doesn't exist or otherwise fails.
    """
    global _tidy_cmd, _tidy_exists

    from commands import _options
    require_tidy = _options.get('require_tidy')

    if not _tidy_exists:
        if require_tidy:
            raise TwillException("tidy does not exist and require_tidy is set")
        return (None, None)
    
    #
    # run the command, if we think it exists
    #
    
    clean_html = None
    if _tidy_exists:
        try:
            if not _tidy_cmd:
                _tidy_cmd = config.tidy_cmd.split()
            process = subprocess.Popen(_tidy_cmd, stdin=subprocess.PIPE,
                                       stdout=subprocess.PIPE,
                                       stderr=subprocess.PIPE, bufsize=0,
                                       shell=False)
        
            (stdout, stderr) = process.communicate(html)

            clean_html = stdout
            errors = stderr
        except OSError:
            _tidy_exists = False

    errors = None
    if require_tidy and clean_html is None:
        raise TwillException("tidy does not exist and require_tidy is set")

    return (clean_html, errors)
Ejemplo n.º 6
0
    def http_response(self, request, response):
        from twill.commands import OUT, _options
        do_refresh = _options.get('acknowledge_equiv_refresh')
        
        code, msg, hdrs = response.code, response.msg, response.info()

        if code == 200 and hdrs.has_key("refresh") and do_refresh:
            refresh = hdrs.getheaders("refresh")[0]
            
            if _debug_print_refresh:
                print>>OUT, "equiv-refresh DEBUG: code 200, hdrs has 'refresh'"
                print>>OUT, "equiv-refresh DEBUG: refresh header is", refresh
                
            i = refresh.find(";")
            if i != -1:
                pause, newurl_spec = refresh[:i], refresh[i+1:]
                pause = int(pause)

                if _debug_print_refresh:
                    print>>OUT, "equiv-refresh DEBUG: pause:", pause
                    print>>OUT, "equiv-refresh DEBUG: new url:", newurl_spec
                
                j = newurl_spec.find("=")
                if j != -1:
                    newurl = newurl_spec[j+1:]
                else:
                    newurl = newurl_spec

                if _debug_print_refresh:
                    print>>OUT, "equiv-refresh DEBUG: final url:", newurl

                print>>OUT, "Following HTTP-EQUIV=REFRESH to %s" % (newurl,)
                    
                if (self.max_time is None) or (pause <= self.max_time):
                    if pause != 0 and 0:  # CTB hack! ==#  and self.honor_time:
                        time.sleep(pause)
                    hdrs["location"] = newurl
                    # hardcoded http is NOT a bug
                    response = self.parent.error(
                        "http", request, response,
                        "refresh", msg, hdrs)

        return response
Ejemplo n.º 7
0
    def use_BS(self):
        from twill.commands import _options
        flag = _options.get('use_BeautifulSoup')

        return flag
Ejemplo n.º 8
0
def _follow_equiv_refresh():
    from twill.commands import _options
    return _options.get('acknowledge_equiv_refresh')
Ejemplo n.º 9
0
    def use_BS(self):
        from twill.commands import _options

        flag = _options.get("use_BeautifulSoup")

        return flag
Ejemplo n.º 10
0
def _follow_equiv_refresh():
    from twine.commands import _options
    return _options.get('acknowledge_equiv_refresh')