Beispiel #1
0
 def start_map(self, attrs):
     # ignore maps without names
     if attrs.has_key('name'):
         from ImageMap import MapInfo
         self.current_map = MapInfo(attrs['name'])
     else:
         self.badhtml = 1
 def start_map(self, attrs):
     # ignore maps without names
     if attrs.has_key('name'):
         from ImageMap import MapInfo
         self.current_map = MapInfo(attrs['name'])
     else:
         self.badhtml = 1
Beispiel #3
0
class GrailHTMLParser(HTMLParser):

    object_aware_tags = ['param', 'a', 'alias', 'applet', 'script', 'object']

    def __init__(self, viewer, reload=0):
        global _inited
        self.viewer = viewer
        self.reload = reload
        self.context = self.viewer.context
        self.app = self.context.app
        self.load_dingbat = self.app.load_dingbat
        self.loaded = []
        self.current_map = None
        self.target = None
        self.formatter_stack = []
        fmt = formatter.AbstractFormatter(self.viewer)
        HTMLParser.__init__(self, fmt)
        self.push_formatter(fmt)
        if not _inited:
            _inited = 1
            init_module(self.app.prefs)
        self._ids = {}
        # Hackery so reload status can be reset when all applets are loaded
        import AppletLoader
        self.reload1 = self.reload and AppletLoader.set_reload(self.context)
        if self.reload1:
            self.reload1.attach(self)
        if self.app.prefs.GetBoolean('parsing-html', 'strict'):
            self.sgml_parser.restrict(0)
        # Information from <META ... CONTENT="..."> is collected here.
        # Entries are KEY --> [(NAME, HTTP-EQUIV, CONTENT), ...], where
        # KEY is (NAME or HTTP-EQUIV).
        self._metadata = {}

    def close(self):
        HTMLParser.close(self)
        if self.reload1:
            self.reload1.detach(self)
        self.reload1 = None
        refresh = None
        if self._metadata.has_key("refresh"):
            name, http_equiv, refresh = self._metadata["refresh"][0]
        elif self.context.get_headers().has_key("refresh"):
            refresh = self.context.get_headers()["refresh"]
        if refresh:
            DynamicReloader(self.context, refresh)

    # manage the formatter stack
    def get_formatter(self):
        return self.formatter_stack[-1]

    def push_formatter(self, formatter):
        self.formatter_stack.append(formatter)
        self.set_formatter(formatter)

    def pop_formatter(self):
        del self.formatter_stack[-1]
        self.set_formatter(self.formatter_stack[-1])

    def set_formatter(self, formatter):
        self.formatter = formatter  ## in base class
        self.viewer = formatter.writer
        self.context = self.viewer.context
        if self.nofill:
            self.set_data_handler(formatter.add_literal_data)
        else:
            self.set_data_handler(formatter.add_flowing_data)

    # Override HTMLParser internal methods

    def get_devicetypes(self):
        """Return sequence of device type names."""
        return ('viewer', 'writer')

    def register_id(self, id):
        if self._ids.has_key(id):
            self.badhtml = 1
            return 0
        self._ids[id] = id
        self.viewer.add_target('#' + id)
        return 1

    def anchor_bgn(self, href, name, type, target="", id=None):
        self.anchor = href
        self.target = target
        atag, utag, idtag = None, None, None
        if href:
            atag = 'a'
            if target:
                utag = '>%s%s%s' % (href, Viewer.TARGET_SEPARATOR, target)
            else:
                utag = '>' + href
            self.viewer.bind_anchors(utag)
            hist = self.app.global_history
            if hist.inhistory_p(self.context.get_baseurl(href)):
                atag = 'ahist'
        if id and self.register_id(id):
            idtag = id and ('#' + id) or None
        if name and self.register_id(name):
            self.formatter.push_style(atag, utag, '#' + name, idtag)
        else:
            self.formatter.push_style(atag, utag, None, idtag)

    def anchor_end(self):
        self.formatter.pop_style(4)
        self.anchor = self.target = None

    def do_hr(self, attrs):
        if attrs.has_key('src') and self.app.load_images:
            align = extract_keyword(
                'align',
                attrs,
                default='center',
                conv=lambda s, gu=grailutil: gu.conv_enumeration(
                    gu.conv_normstring(s), ['left', 'center', 'right']))
            self.implied_end_p()
            self.formatter.push_alignment(align)
            self.do_img({'border': '0', 'src': attrs['src']})
            self.formatter.pop_alignment()
            self.formatter.add_line_break()
            return
        HTMLParser.do_hr(self, attrs)
        color = extract_keyword('color', attrs)
        rule = self.viewer.rules[-1]
        if attrs.has_key('noshade') and self.viewer.rules:
            if color:
                if not self.configcolor('background', color, widget=rule):
                    self.configcolor('background',
                                     self.viewer.text['foreground'],
                                     widget=rule)
            else:
                # this color is known to work already
                rule.config(background=self.viewer.text['foreground'])
            rule.config(relief=FLAT)
            size = extract_keyword('size',
                                   attrs,
                                   2,
                                   conv=grailutil.conv_integer)
            if size == 1:
                # could not actually set it to 1 unless it was flat; do it now:
                width = string.atoi(rule.cget('width'))
                rule.config(borderwidth=0, height=1, width=width + 2)
        elif color:
            self.configcolor('background', color, widget=rule)

    # Duplicated from htmllib.py because we want to have the border attribute
    def do_img(self, attrs):
        align, usemap = BASELINE, None
        extract = extract_keyword
        ## align = extract('align', attrs, align, conv=conv_align)
        alt = extract('alt', attrs, '(image)')
        border = extract('border',
                         attrs,
                         self.anchor and 2 or None,
                         conv=string.atoi)
        ismap = attrs.has_key('ismap')
        if ismap and border is None: border = 2
        src = extract('src', attrs, '')
        width = extract('width', attrs, 0, conv=string.atoi)
        height = extract('height', attrs, 0, conv=string.atoi)
        hspace = extract('hspace', attrs, 0, conv=string.atoi)
        vspace = extract('vspace', attrs, 0, conv=string.atoi)
        # not sure how to assert(value[0] == '#')
        usemap = extract('usemap', attrs, conv=string.strip)
        if usemap:
            if usemap[0] == '#': value = string.strip(usemap[1:])
            from ImageMap import MapThunk
            usemap = MapThunk(self.context, usemap)
            if border is None: border = 2
        self.handle_image(src,
                          alt,
                          usemap,
                          ismap,
                          align,
                          width,
                          height,
                          border or 0,
                          self.reload1,
                          hspace=hspace,
                          vspace=vspace)

    def handle_image(self,
                     src,
                     alt,
                     usemap,
                     ismap,
                     align,
                     width,
                     height,
                     border=2,
                     reload=0,
                     hspace=0,
                     vspace=0):
        if not self.app.prefs.GetBoolean("browser", "load-images"):
            self.handle_data(alt)
            return
        from ImageWindow import ImageWindow
        window = ImageWindow(self.viewer, self.anchor, src, alt or "(Image)",
                             usemap, ismap, align, width, height, border,
                             self.target, reload)
        self.add_subwindow(window, align=align, hspace=hspace, vspace=vspace)

    def add_subwindow(self, w, align=CENTER, hspace=0, vspace=0):
        self.formatter.flush_softspace()
        if self.formatter.nospace:
            # XXX Disgusting hack to tag the first character of the line
            # so things like indents and centering work
            self.viewer.prepare_for_insertion()
        self.viewer.add_subwindow(w, align=align)
        ##      if hspace or vspace:
        ##          self.viewer.text.window_config(w, padx=hspace, pady=vspace)
        self.formatter.assert_line_data()

    # Extend tag: </TITLE>

    def end_title(self):
        HTMLParser.end_title(self)
        self.context.set_title(self.title)
        if not self.inhead:
            self.badhtml = 1

    # Override tag: <BODY colorspecs...>

    def start_body(self, attrs):
        HTMLParser.start_body(self, attrs)
        if not self.app.prefs.GetBoolean('parsing-html', 'honor-colors'):
            return
        from grailutil import conv_normstring
        bgcolor = extract_keyword('bgcolor', attrs, conv=conv_normstring)
        if bgcolor:
            clr = self.configcolor('background', bgcolor)
            if clr:
                #  Normally not important, but ISINDEX would cause
                #  these to be non-empty, as would all sorts of illegal stuff:
                for hr in self.viewer.rules + self.viewer.subwindows:
                    hr.config(highlightbackground=clr)
        self.configcolor('foreground',
                         extract_keyword('text', attrs, conv=conv_normstring))
        self.configcolor('foreground',
                         extract_keyword('link', attrs, conv=conv_normstring),
                         'a')
        self.configcolor('foreground',
                         extract_keyword('vlink', attrs, conv=conv_normstring),
                         'ahist')
        self.configcolor('foreground',
                         extract_keyword('alink', attrs, conv=conv_normstring),
                         'atemp')

    # These are defined by the HTML 3.2 (Wilbur) version of HTML.
    _std_colors = {
        "black": "#000000",
        "silver": "#c0c0c0",
        "gray": "#808080",
        "white": "#ffffff",
        "maroon": "#800000",
        "red": "#ff0000",
        "purple": "#800080",
        "fuchsia": "#ff00ff",
        "green": "#008000",
        "lime": "#00ff00",
        "olive": "#808000",
        "yellow": "#ffff00",
        "navy": "#000080",
        "blue": "#0000ff",
        "teal": "#008080",
        "aqua": "#00ffff",
    }

    def configcolor(self, option, color, tag=None, widget=None):
        """Set a color option, returning the color that was actually used.

        If no color was set, `None' is returned.
        """
        if not color:
            return None
        if not widget:
            widget = self.viewer.text
        c = try_configcolor(option, color, tag, widget)
        if color[0] != '#' and not c:
            c = try_configcolor(option, '#' + color, tag, widget)
        if not c and self._std_colors.has_key(color):
            color = self._std_colors[color]
            c = try_configcolor(option, color, tag, widget)
        return c

    # Override tag: <BASE HREF=...>

    def do_base(self, attrs):
        base = None
        target = None
        if attrs.has_key('href'):
            base = attrs['href']
        if attrs.has_key('target'):
            target = attrs['target']
        self.context.set_baseurl(base, target)

    # Override tag: <META ...>

    def do_meta(self, attrs):
        # CONTENT='...' is required;
        # at least one of HTTP-EQUIV=xyz or NAME=xyz is required.
        if not attrs.has_key("content") \
           or not (attrs.has_key("http-equiv") or attrs.has_key("name")):
            self.badhtml = 1
            return
        name = extract_keyword("name", attrs, conv=grailutil.conv_normstring)
        http_equiv = extract_keyword("http-equiv",
                                     attrs,
                                     conv=grailutil.conv_normstring)
        key = name or http_equiv
        if not key:
            self.badhtml = 1
            return
        content = extract_keyword("content", attrs, conv=string.strip)
        item = (name, http_equiv, content)
        if self._metadata.has_key(key):
            self._metadata[key].append(item)
        else:
            entries = self._metadata[key] = [item]
        if key == "grail:parse-mode":
            content = grailutil.conv_normstring(content)
            strict = self.sgml_parser.strict_p()
            if content == "strict" and not strict:
                self.sgml_parser.restrict(0)
                self.context.message("Entered strict parsing mode on"
                                     " document request.")
            elif content == "forgiving" and strict:
                self.sgml_parser.restrict(1)
                self.context.message("Exited strict parsing mode on"
                                     " document request.")

    # Duplicated from htmllib.py because we want to have the target attribute
    def start_a(self, attrs):
        if self.get_object():  # expensive!
            self.get_object().anchor(attrs)
            return
        name = type = target = title = ''
        id = None
        has_key = attrs.has_key
        #
        href = string.strip(attrs.get("urn", ""))
        scheme, resturl = urllib.splittype(href)
        if scheme == "urn":
            scheme, resturl = urllib.splittype(resturl)
        if scheme not in ("doi", "hdl", "ietf"):
            # this is an unknown URN scheme or there wasn't a URN
            href = string.strip(attrs.get("href", ""))
        name = extract_keyword('name', attrs, conv=grailutil.conv_normstring)
        if has_key('type'): type = string.lower(attrs['type'] or '')
        if has_key('target'): target = attrs['target']
        if has_key('id'): id = attrs['id']
        self.anchor_bgn(href, name, type, target, id)
        # Delay this at least a little, since we don't want to add the title
        # to the history until the last possible moment.  We need a non-history
        # way to do this; a resources database would be much better.
        if has_key('title'):
            title = string.join(string.split(attrs['title'] or ''))
            if title:
                url = self.context.get_baseurl(
                    string.joinfields(string.split(href), ''))
                old_title, when = self.app.global_history.lookup_url(url)
                if not old_title:
                    # Only do this if there's not already a title in the
                    # history.  If the URL wasn't in the history, it will
                    # be given a timestamp, which is bad. ;-(
                    self.app.global_history.set_title(url, title)

    # New tag: <MAP> (for client side image maps)

    def start_map(self, attrs):
        # ignore maps without names
        if attrs.has_key('name'):
            from ImageMap import MapInfo
            self.current_map = MapInfo(attrs['name'])
        else:
            self.badhtml = 1

    def end_map(self):
        if self.current_map:
            self.context.image_maps[self.current_map.name] = self.current_map
            self.current_map = None

    # New tag: <AREA> (goes inside a map)

    def do_area(self, attrs):
        """Handle the <AREA> tag."""

        if self.current_map:
            extract = extract_keyword
            shape = extract('shape',
                            attrs,
                            'rect',
                            conv=grailutil.conv_normstring)
            if shape == 'polygon':
                shape = 'poly'
                self.badhtml = 1
            coords = extract('coords', attrs, '')
            alt = extract('alt', attrs, '')
            target = extract('target', attrs, '')
            # not sure what the point of NOHREF is
            url = extract('nohref', attrs, extract('href', attrs, ''))

            try:
                self.current_map.add_shape(
                    shape, self.parse_area_coords(shape, coords), url, target)
            except (IndexError, ValueError):
                # wrong number of coordinates
                # how should this get reported to the user?
                self.badhtml = 1
                print "imagemap specifies bad coordinates:", ` coords `
                pass
        else:
            self.badhtml = 1

    def parse_area_coords(self, shape, text):
        """Parses coordinate string into list of numbers.

        Coordinates are stored differently depending on the shape of
        the object.

        Raise string.atoi_error when bad numbers occur.
        Raise IndexError when not enough coordinates are specified.
        
        """
        import re

        coords = []

        terms = map(string.atoi, re.split('[, ]+', string.strip(text)))

        if shape == 'poly':
            # list of (x,y) tuples
            while len(terms) > 0:
                coords.append((terms[0], terms[1]))
                del terms[:2]
            if coords[0] != coords[-1:]:
                # make sure the polygon is closed
                coords.append(coords[0])
        elif shape == 'rect':
            # (x,y) tuples for upper left, lower right
            coords.append((terms[0], terms[1]))
            coords.append((terms[2], terms[3]))
        elif shape == 'circle':
            # (x,y) tuple for center, followed by int for radius
            coords.append((terms[0], terms[1]))
            coords.append(terms[2])
        return coords

    # New tag: <APPLET>

    def start_applet(self, attrs):
        # re-write the attributes to use the <OBJECT> support:
        import copy
        nattrs = copy.copy(attrs)
        if attrs.has_key('name'):
            nattrs['classid'] = attrs['name']
            del nattrs['name']
        if attrs.has_key('code') and not attrs.has_key('codebase'):
            nattrs['codebase'] = attrs['code']
            del nattrs['code']
        self.start_object(nattrs, 'applet')

    def end_applet(self):
        self.end_object()

    # New tag: <APP> (for Grail 0.2 compatibility)

    def do_app(self, attrs):
        mod, cls, src = self.get_mod_class_src(attrs)
        if not (mod and cls): return
        width = extract_attribute('width', attrs, conv=string.atoi, delete=1)
        height = extract_attribute('height', attrs, conv=string.atoi, delete=1)
        menu = extract_attribute('menu', attrs, delete=1)
        mod = mod + ".py"
        import AppletLoader
        apploader = AppletLoader.AppletLoader(self,
                                              code=mod,
                                              name=cls,
                                              codebase=src,
                                              width=width,
                                              height=height,
                                              menu=menu,
                                              reload=self.reload1)
        if apploader.feasible():
            for name, value in attrs.items():
                apploader.set_param(name, value)
            apploader.go_for_it()
        else:
            apploader.close()

    # Subroutines for <APP> tag parsing

    def get_mod_class_src(self, keywords):
        cls = extract_attribute('class', keywords, '', delete=1)
        src = extract_attribute('src', keywords, delete=1)
        if '.' in cls:
            i = string.rfind(cls, '.')
            mod = cls[:i]
            cls = cls[i + 1:]
        else:
            mod = cls
        return mod, cls, src

    # Heading support for dingbats (iconic entities):

    def header_bgn(self, tag, level, attrs):
        HTMLParser.header_bgn(self, tag, level, attrs)
        dingbat = extract_keyword('dingbat', attrs)
        if dingbat:
            self.unknown_entityref(dingbat, '')
            self.formatter.add_flowing_data(' ')
        elif attrs.has_key('src'):
            self.do_img(attrs)
            self.formatter.add_flowing_data(' ')

    # List attribute extensions:

    def start_ul(self, attrs, tag='ul'):
        if attrs.has_key('dingbat'):
            self.list_handle_dingbat(attrs)
        elif attrs.has_key('src'):
            self.list_handle_src(attrs)
        HTMLParser.start_ul(self, attrs, tag=tag)

    def do_li(self, attrs):
        if attrs.has_key('dingbat'):
            if self.list_stack:
                if self.list_stack[-1][0] == 'ul':
                    self.list_handle_dingbat(attrs)
            else:
                self.list_handle_dingbat(attrs)
        elif attrs.has_key('src'):
            if self.list_stack:
                if self.list_stack[-1][0] == 'ul':
                    self.list_handle_src(attrs)
            else:
                self.list_handle_src(attrs)
        HTMLParser.do_li(self, attrs)

    def list_handle_dingbat(self, attrs):
        if attrs['dingbat']:
            img = self.load_dingbat(attrs['dingbat'])
            if img: attrs['type'] = img

    def list_handle_src(self, attrs):
        if not self.app.prefs.GetBoolean("browser", "load-images"):
            return
        src = string.joinfields(string.split(attrs['src']), '')
        image = self.context.get_async_image(src, self.reload)
        if image: attrs['type'] = image

    # Override make_format():
    # This allows disc/circle/square to be mapped to dingbats.

    def make_format(self, format, default='disc', listtype=None):
        fmt = format or default
        if type(fmt) is StringType:
            fmt = string.lower(fmt)
        if fmt in ('disc', 'circle', 'square'):
            if listtype == 'ul':
                img = self.load_dingbat(fmt)
                return img or HTMLParser.make_format(
                    self, format, default, listtype=listtype)
            else:
                return '1.'
        else:
            return HTMLParser.make_format(self,
                                          format,
                                          default,
                                          listtype=listtype)

    def report_unbalanced(self, tag):
        self.badhtml = 1

    # Handle proposed iconic entities (see W3C working drafts or HTML 3):

    def unknown_entityref(self, entname, terminator):
        if self.suppress_output:
            return
        img = self.load_dingbat(entname)
        if img:
            if type(img) is TupleType:
                s, tag = img
                if tag:
                    if tag != "_ding":
                        tag = (self.formatter.writer.fonttag or '') + tag
                    self.viewer.configure_fonttag(tag)
                    self.formatter.push_style(tag)
                    self.viewer.text.tag_raise(tag)
                    self.handle_data(s)
                    self.formatter.pop_style()
                else:
                    self.handle_data(s)
            else:
                bgcolor = self.viewer.text['background']
                label = Label(self.viewer.text,
                              image=img,
                              background=bgcolor,
                              borderwidth=0)
                self.add_subwindow(label)
                # this needs to be done *after* the add_subwindow()
                # call to get the right <Button-3> bindings.
                if self.anchor:
                    IconicEntityLinker(self.viewer, self.anchor, self.target,
                                       label)
        else:
            # Could not load dingbat, allow parent class to handle:
            HTMLParser.unknown_entityref(self, entname, terminator)

    def entref_nbsp(self, terminator):
        self.__do_invisible('i')

    def entref_emsp(self, terminator):
        self.__do_invisible("M")

    def entref_quad(self, terminator):
        self.__do_invisible("MMMM")

    def __do_invisible(self, s):
        #
        # This breaks using the X-Selection for cut & paste somewhat: the
        # invisible text does not get translated to space characters, so
        # whatever was used gets pasted.
        #
        self.formatter.softspace = 0
        bgcolor = self.viewer.text["background"]
        self.viewer.text.tag_config("INVISIBLE", foreground=bgcolor)
        self.formatter.push_style("INVISIBLE")
        self.handle_data(s)
        self.formatter.pop_style()
        self.formatter.nospace = 1
class GrailHTMLParser(HTMLParser):

    object_aware_tags = ['param', 'a', 'alias', 'applet', 'script', 'object']

    def __init__(self, viewer, reload=0):
        global _inited
        self.viewer = viewer
        self.reload = reload
        self.context = self.viewer.context
        self.app = self.context.app
        self.load_dingbat = self.app.load_dingbat
        self.loaded = []
        self.current_map = None
        self.target = None
        self.formatter_stack = []
        fmt = formatter.AbstractFormatter(self.viewer)
        HTMLParser.__init__(self, fmt)
        self.push_formatter(fmt)
        if not _inited:
            _inited = 1
            init_module(self.app.prefs)
        self._ids = {}
        # Hackery so reload status can be reset when all applets are loaded
        import AppletLoader
        self.reload1 = self.reload and AppletLoader.set_reload(self.context)
        if self.reload1:
            self.reload1.attach(self)
        if self.app.prefs.GetBoolean('parsing-html', 'strict'):
            self.sgml_parser.restrict(0)
        # Information from <META ... CONTENT="..."> is collected here.
        # Entries are KEY --> [(NAME, HTTP-EQUIV, CONTENT), ...], where
        # KEY is (NAME or HTTP-EQUIV).
        self._metadata = {}

    def close(self):
        HTMLParser.close(self)
        if self.reload1:
            self.reload1.detach(self)
        self.reload1 = None
        refresh = None
        if self._metadata.has_key("refresh"):
            name, http_equiv, refresh = self._metadata["refresh"][0]
        elif self.context.get_headers().has_key("refresh"):
            refresh = self.context.get_headers()["refresh"]
        if refresh:
            DynamicReloader(self.context, refresh)

    # manage the formatter stack
    def get_formatter(self):
        return self.formatter_stack[-1]

    def push_formatter(self, formatter):
        self.formatter_stack.append(formatter)
        self.set_formatter(formatter)

    def pop_formatter(self):
        del self.formatter_stack[-1]
        self.set_formatter(self.formatter_stack[-1])

    def set_formatter(self, formatter):
        self.formatter = formatter      ## in base class
        self.viewer = formatter.writer
        self.context = self.viewer.context
        if self.nofill:
            self.set_data_handler(formatter.add_literal_data)
        else:
            self.set_data_handler(formatter.add_flowing_data)

    # Override HTMLParser internal methods

    def get_devicetypes(self):
        """Return sequence of device type names."""
        return ('viewer', 'writer')

    def register_id(self, id):
        if self._ids.has_key(id):
            self.badhtml = 1
            return 0
        self._ids[id] = id
        self.viewer.add_target('#' + id)
        return 1

    def anchor_bgn(self, href, name, type, target="", id=None):
        self.anchor = href
        self.target = target
        atag, utag, idtag = None, None, None
        if href:
            atag = 'a'
            if target:
                utag = '>%s%s%s' % (href, Viewer.TARGET_SEPARATOR, target)
            else:
                utag = '>' + href
            self.viewer.bind_anchors(utag)
            hist = self.app.global_history
            if hist.inhistory_p(self.context.get_baseurl(href)):
                atag = 'ahist'
        if id and self.register_id(id):
            idtag = id and ('#' + id) or None
        if name and self.register_id(name):
            self.formatter.push_style(atag, utag, '#' + name, idtag)
        else:
            self.formatter.push_style(atag, utag, None, idtag)

    def anchor_end(self):
        self.formatter.pop_style(4)
        self.anchor = self.target = None

    def do_hr(self, attrs):
        if attrs.has_key('src') and self.app.load_images:
            align = extract_keyword('align', attrs, default='center',
                    conv=lambda s,gu=grailutil: gu.conv_enumeration(
                        gu.conv_normstring(s), ['left', 'center', 'right']))
            self.implied_end_p()
            self.formatter.push_alignment(align)
            self.do_img({'border': '0', 'src': attrs['src']})
            self.formatter.pop_alignment()
            self.formatter.add_line_break()
            return
        HTMLParser.do_hr(self, attrs)
        color = extract_keyword('color', attrs)
        rule = self.viewer.rules[-1]
        if attrs.has_key('noshade') and self.viewer.rules:
            if color:
                if not self.configcolor('background', color, widget=rule):
                    self.configcolor('background',
                                     self.viewer.text['foreground'],
                                     widget=rule)
            else:
                # this color is known to work already
                rule.config(background=self.viewer.text['foreground'])
            rule.config(relief=FLAT)
            size = extract_keyword('size', attrs, 2,
                                   conv=grailutil.conv_integer)
            if size == 1:
                # could not actually set it to 1 unless it was flat; do it now:
                width = string.atoi(rule.cget('width'))
                rule.config(borderwidth=0, height=1, width=width+2)
        elif color:
            self.configcolor('background', color, widget=rule)

    # Duplicated from htmllib.py because we want to have the border attribute
    def do_img(self, attrs):
        align, usemap = BASELINE, None
        extract = extract_keyword
        ## align = extract('align', attrs, align, conv=conv_align)
        alt = extract('alt', attrs, '(image)')
        border = extract('border', attrs, self.anchor and 2 or None,
                         conv=string.atoi)
        ismap = attrs.has_key('ismap')
        if ismap and border is None: border = 2
        src = extract('src', attrs, '')
        width = extract('width', attrs, 0, conv=string.atoi)
        height = extract('height', attrs, 0, conv=string.atoi)
        hspace = extract('hspace', attrs, 0, conv=string.atoi)
        vspace = extract('vspace', attrs, 0, conv=string.atoi)
        # not sure how to assert(value[0] == '#')
        usemap = extract('usemap', attrs, conv=string.strip)
        if usemap:
            if usemap[0] == '#': value = string.strip(usemap[1:])
            from ImageMap import MapThunk
            usemap = MapThunk(self.context, usemap)
            if border is None: border = 2
        self.handle_image(src, alt, usemap, ismap,
                          align, width, height, border or 0, self.reload1,
                          hspace=hspace, vspace=vspace)

    def handle_image(self, src, alt, usemap, ismap, align, width,
                     height, border=2, reload=0, hspace=0, vspace=0):
        if not self.app.prefs.GetBoolean("browser", "load-images"):
            self.handle_data(alt)
            return
        from ImageWindow import ImageWindow
        window = ImageWindow(self.viewer, self.anchor, src, alt or "(Image)",
                             usemap, ismap, align, width, height,
                             border, self.target, reload)
        self.add_subwindow(window, align=align, hspace=hspace, vspace=vspace)

    def add_subwindow(self, w, align=CENTER, hspace=0, vspace=0):
        self.formatter.flush_softspace()
        if self.formatter.nospace:
            # XXX Disgusting hack to tag the first character of the line
            # so things like indents and centering work
            self.viewer.prepare_for_insertion()
        self.viewer.add_subwindow(w, align=align)
##      if hspace or vspace:
##          self.viewer.text.window_config(w, padx=hspace, pady=vspace)
        self.formatter.assert_line_data()

    # Extend tag: </TITLE>

    def end_title(self):
        HTMLParser.end_title(self)
        self.context.set_title(self.title)
        if not self.inhead:
            self.badhtml = 1

    # Override tag: <BODY colorspecs...>

    def start_body(self, attrs):
        HTMLParser.start_body(self, attrs)
        if not self.app.prefs.GetBoolean('parsing-html', 'honor-colors'):
            return
        from grailutil import conv_normstring
        bgcolor = extract_keyword('bgcolor', attrs, conv=conv_normstring)
        if bgcolor:
            clr = self.configcolor('background', bgcolor)
            if clr:
                #  Normally not important, but ISINDEX would cause
                #  these to be non-empty, as would all sorts of illegal stuff:
                for hr in self.viewer.rules + self.viewer.subwindows:
                    hr.config(highlightbackground = clr)
        self.configcolor('foreground',
                         extract_keyword('text', attrs, conv=conv_normstring))
        self.configcolor('foreground',
                         extract_keyword('link', attrs, conv=conv_normstring),
                         'a')
        self.configcolor('foreground',
                         extract_keyword('vlink', attrs, conv=conv_normstring),
                         'ahist')
        self.configcolor('foreground',
                         extract_keyword('alink', attrs, conv=conv_normstring),
                         'atemp')

    # These are defined by the HTML 3.2 (Wilbur) version of HTML.
    _std_colors = {"black": "#000000",
                   "silver": "#c0c0c0",
                   "gray": "#808080",
                   "white": "#ffffff",
                   "maroon": "#800000",
                   "red": "#ff0000",
                   "purple": "#800080",
                   "fuchsia": "#ff00ff",
                   "green": "#008000",
                   "lime": "#00ff00",
                   "olive": "#808000",
                   "yellow": "#ffff00",
                   "navy": "#000080",
                   "blue": "#0000ff",
                   "teal": "#008080",
                   "aqua": "#00ffff",
                   }

    def configcolor(self, option, color, tag=None, widget=None):
        """Set a color option, returning the color that was actually used.

        If no color was set, `None' is returned.
        """
        if not color:
            return None
        if not widget:
            widget = self.viewer.text
        c = try_configcolor(option, color, tag, widget)
        if color[0] != '#' and not c:
            c = try_configcolor(option, '#' + color, tag, widget)
        if not c and self._std_colors.has_key(color):
            color = self._std_colors[color]
            c = try_configcolor(option, color, tag, widget)
        return c

    # Override tag: <BASE HREF=...>

    def do_base(self, attrs):
        base = None
        target = None
        if attrs.has_key('href'):
            base = attrs['href']
        if attrs.has_key('target'):
            target = attrs['target']
        self.context.set_baseurl(base, target)

    # Override tag: <META ...>

    def do_meta(self, attrs):
        # CONTENT='...' is required;
        # at least one of HTTP-EQUIV=xyz or NAME=xyz is required.
        if not attrs.has_key("content") \
           or not (attrs.has_key("http-equiv") or attrs.has_key("name")):
            self.badhtml = 1
            return
        name = extract_keyword("name", attrs, conv=grailutil.conv_normstring)
        http_equiv = extract_keyword("http-equiv", attrs,
                                     conv=grailutil.conv_normstring)
        key = name or http_equiv
        if not key:
            self.badhtml = 1
            return
        content = extract_keyword("content", attrs, conv=string.strip)
        item = (name, http_equiv, content)
        if self._metadata.has_key(key):
            self._metadata[key].append(item)
        else:
            entries = self._metadata[key] = [item]
        if key == "grail:parse-mode":
            content = grailutil.conv_normstring(content)
            strict = self.sgml_parser.strict_p()
            if content == "strict" and not strict:
                self.sgml_parser.restrict(0)
                self.context.message("Entered strict parsing mode on"
                                     " document request.")
            elif content == "forgiving" and strict:
                self.sgml_parser.restrict(1)
                self.context.message("Exited strict parsing mode on"
                                     " document request.")

    # Duplicated from htmllib.py because we want to have the target attribute
    def start_a(self, attrs):
        if self.get_object():           # expensive!
            self.get_object().anchor(attrs)
            return
        name = type = target = title = ''
        id = None
        has_key = attrs.has_key
        #
        href = string.strip(attrs.get("urn", ""))
        scheme, resturl = urllib.splittype(href)
        if scheme == "urn":
            scheme, resturl = urllib.splittype(resturl)
        if scheme not in ("doi", "hdl", "ietf"):
            # this is an unknown URN scheme or there wasn't a URN
            href = string.strip(attrs.get("href", ""))
        name = extract_keyword('name', attrs,
                               conv=grailutil.conv_normstring)
        if has_key('type'): type = string.lower(attrs['type'] or '')
        if has_key('target'): target = attrs['target']
        if has_key('id'): id = attrs['id']
        self.anchor_bgn(href, name, type, target, id)
        # Delay this at least a little, since we don't want to add the title
        # to the history until the last possible moment.  We need a non-history
        # way to do this; a resources database would be much better.
        if has_key('title'):
            title = string.join(string.split(attrs['title'] or ''))
            if title:
                url = self.context.get_baseurl(
                    string.joinfields(string.split(href), ''))
                old_title, when = self.app.global_history.lookup_url(url)
                if not old_title:
                    # Only do this if there's not already a title in the
                    # history.  If the URL wasn't in the history, it will
                    # be given a timestamp, which is bad. ;-(
                    self.app.global_history.set_title(url, title)

    # New tag: <MAP> (for client side image maps)

    def start_map(self, attrs):
        # ignore maps without names
        if attrs.has_key('name'):
            from ImageMap import MapInfo
            self.current_map = MapInfo(attrs['name'])
        else:
            self.badhtml = 1

    def end_map(self):
        if self.current_map:
            self.context.image_maps[self.current_map.name] = self.current_map
            self.current_map = None

    # New tag: <AREA> (goes inside a map)

    def do_area(self, attrs):
        """Handle the <AREA> tag."""

        if self.current_map:
            extract = extract_keyword
            shape = extract('shape', attrs, 'rect',
                            conv=grailutil.conv_normstring)
            if shape == 'polygon':
                shape = 'poly'
                self.badhtml = 1
            coords = extract('coords', attrs, '')
            alt = extract('alt', attrs, '')
            target = extract('target', attrs, '')
            # not sure what the point of NOHREF is
            url = extract('nohref', attrs, extract('href', attrs, ''))

            try:
                self.current_map.add_shape(
                    shape, self.parse_area_coords(shape, coords), url, target)
            except (IndexError, ValueError):
                # wrong number of coordinates
                # how should this get reported to the user?
                self.badhtml = 1
                print "imagemap specifies bad coordinates:", `coords`
                pass
        else:
            self.badhtml = 1

    def parse_area_coords(self, shape, text):
        """Parses coordinate string into list of numbers.

        Coordinates are stored differently depending on the shape of
        the object.

        Raise string.atoi_error when bad numbers occur.
        Raise IndexError when not enough coordinates are specified.
        
        """
        import re

        coords = []

        terms = map(string.atoi, re.split('[, ]+', string.strip(text)))

        if shape == 'poly':
            # list of (x,y) tuples
            while len(terms) > 0:
                coords.append((terms[0], terms[1]))
                del terms[:2]
            if coords[0] != coords[-1:]:
                # make sure the polygon is closed
                coords.append(coords[0])
        elif shape == 'rect':
            # (x,y) tuples for upper left, lower right
            coords.append((terms[0], terms[1]))
            coords.append((terms[2], terms[3]))
        elif shape == 'circle':
            # (x,y) tuple for center, followed by int for radius
            coords.append((terms[0], terms[1]))
            coords.append(terms[2])
        return coords

    # New tag: <APPLET>

    def start_applet(self, attrs):
        # re-write the attributes to use the <OBJECT> support:
        import copy
        nattrs = copy.copy(attrs)
        if attrs.has_key('name'):
            nattrs['classid'] = attrs['name']
            del nattrs['name']
        if attrs.has_key('code') and not attrs.has_key('codebase'):
            nattrs['codebase'] = attrs['code']
            del nattrs['code']
        self.start_object(nattrs, 'applet')

    def end_applet(self):
        self.end_object()

    # New tag: <APP> (for Grail 0.2 compatibility)

    def do_app(self, attrs):
        mod, cls, src = self.get_mod_class_src(attrs)
        if not (mod and cls): return
        width = extract_attribute('width', attrs, conv=string.atoi, delete=1)
        height = extract_attribute('height', attrs, conv=string.atoi, delete=1)
        menu = extract_attribute('menu', attrs, delete=1)
        mod = mod + ".py"
        import AppletLoader
        apploader = AppletLoader.AppletLoader(
            self, code=mod, name=cls, codebase=src,
            width=width, height=height, menu=menu,
            reload=self.reload1)
        if apploader.feasible():
            for name, value in attrs.items():
                apploader.set_param(name, value)
            apploader.go_for_it()
        else:
            apploader.close()

    # Subroutines for <APP> tag parsing

    def get_mod_class_src(self, keywords):
        cls = extract_attribute('class', keywords, '', delete=1)
        src = extract_attribute('src', keywords, delete=1)
        if '.' in cls:
            i = string.rfind(cls, '.')
            mod = cls[:i]
            cls = cls[i+1:]
        else:
            mod = cls
        return mod, cls, src

    # Heading support for dingbats (iconic entities):

    def header_bgn(self, tag, level, attrs):
        HTMLParser.header_bgn(self, tag, level, attrs)
        dingbat = extract_keyword('dingbat', attrs)
        if dingbat:
            self.unknown_entityref(dingbat, '')
            self.formatter.add_flowing_data(' ')
        elif attrs.has_key('src'):
            self.do_img(attrs)
            self.formatter.add_flowing_data(' ')

    # List attribute extensions:

    def start_ul(self, attrs, tag='ul'):
        if attrs.has_key('dingbat'):
            self.list_handle_dingbat(attrs)
        elif attrs.has_key('src'):
            self.list_handle_src(attrs)
        HTMLParser.start_ul(self, attrs, tag=tag)

    def do_li(self, attrs):
        if attrs.has_key('dingbat'):
            if self.list_stack:
                if self.list_stack[-1][0] == 'ul':
                    self.list_handle_dingbat(attrs)
            else:
                self.list_handle_dingbat(attrs)
        elif attrs.has_key('src'):
            if self.list_stack:
                if self.list_stack[-1][0] == 'ul':
                    self.list_handle_src(attrs)
            else:
                self.list_handle_src(attrs)
        HTMLParser.do_li(self, attrs)

    def list_handle_dingbat(self, attrs):
        if attrs['dingbat']:
            img = self.load_dingbat(attrs['dingbat'])
            if img: attrs['type'] = img

    def list_handle_src(self, attrs):
        if not self.app.prefs.GetBoolean("browser", "load-images"):
            return
        src = string.joinfields(string.split(attrs['src']), '')
        image = self.context.get_async_image(src, self.reload)
        if image: attrs['type'] = image

    # Override make_format():
    # This allows disc/circle/square to be mapped to dingbats.

    def make_format(self, format, default='disc', listtype=None):
        fmt = format or default
        if type(fmt) is StringType:
            fmt = string.lower(fmt)
        if fmt in ('disc', 'circle', 'square'):
            if listtype == 'ul':
                img = self.load_dingbat(fmt)
                return img or HTMLParser.make_format(self, format, default,
                                                     listtype = listtype)
            else:
                return '1.'
        else:
            return HTMLParser.make_format(self, format, default,
                                          listtype = listtype)

    def report_unbalanced(self, tag):
        self.badhtml = 1

    # Handle proposed iconic entities (see W3C working drafts or HTML 3):

    def unknown_entityref(self, entname, terminator):
        if self.suppress_output:
            return
        img = self.load_dingbat(entname)
        if img:
            if type(img) is TupleType:
                s, tag = img
                if tag:
                    if tag != "_ding":
                        tag = (self.formatter.writer.fonttag or '') + tag
                    self.viewer.configure_fonttag(tag)
                    self.formatter.push_style(tag)
                    self.viewer.text.tag_raise(tag)
                    self.handle_data(s)
                    self.formatter.pop_style()
                else:
                    self.handle_data(s)
            else:
                bgcolor = self.viewer.text['background']
                label = Label(self.viewer.text, image=img,
                              background=bgcolor, borderwidth=0)
                self.add_subwindow(label)
                # this needs to be done *after* the add_subwindow()
                # call to get the right <Button-3> bindings.
                if self.anchor:
                    IconicEntityLinker(self.viewer, self.anchor,
                                       self.target, label)
        else:
            # Could not load dingbat, allow parent class to handle:
            HTMLParser.unknown_entityref(self, entname, terminator)

    def entref_nbsp(self, terminator):
        self.__do_invisible('i')

    def entref_emsp(self, terminator):
        self.__do_invisible("M")

    def entref_quad(self, terminator):
        self.__do_invisible("MMMM")

    def __do_invisible(self, s):
        #
        # This breaks using the X-Selection for cut & paste somewhat: the
        # invisible text does not get translated to space characters, so
        # whatever was used gets pasted.
        #
        self.formatter.softspace = 0
        bgcolor = self.viewer.text["background"]
        self.viewer.text.tag_config("INVISIBLE", foreground=bgcolor)
        self.formatter.push_style("INVISIBLE")
        self.handle_data(s)
        self.formatter.pop_style()
        self.formatter.nospace = 1