Beispiel #1
0
    def _defaults(self, routekeys, reserved_keys, kargs):
        """Creates default set with values stringified
        
        Put together our list of defaults, stringify non-None values
        and add in our action/id default if they use it and didn't
        specify it.
        
        defaultkeys is a list of the currently assumed default keys
        routekeys is a list of the keys found in the route path
        reserved_keys is a list of keys that are not
        
        """
        defaults = {}
        # Add in a controller/action default if they don't exist
        if "controller" not in routekeys and "controller" not in kargs and not self.explicit:
            kargs["controller"] = "content"
        if "action" not in routekeys and "action" not in kargs and not self.explicit:
            kargs["action"] = "index"
        defaultkeys = frozenset([key for key in kargs.keys() if key not in reserved_keys])
        for key in defaultkeys:
            if kargs[key] is not None:
                defaults[key] = self.make_unicode(kargs[key])
            else:
                defaults[key] = None
        if "action" in routekeys and not defaults.has_key("action") and not self.explicit:
            defaults["action"] = "index"
        if "id" in routekeys and not defaults.has_key("id") and not self.explicit:
            defaults["id"] = None
        newdefaultkeys = frozenset([key for key in defaults.keys() if key not in reserved_keys])

        return (defaults, newdefaultkeys)
Beispiel #2
0
 def _defaults(self, routekeys, reserved_keys, kargs):
     """Creates default set with values stringified
     
     Put together our list of defaults, stringify non-None values
     and add in our action/id default if they use it and didn't specify it
     
     defaultkeys is a list of the currently assumed default keys
     routekeys is a list of the keys found in the route path
     reserved_keys is a list of keys that are not
     
     """
     defaults = {}
     # Add in a controller/action default if they don't exist
     if 'controller' not in routekeys and 'controller' not in kargs \
        and not self.explicit:
         kargs['controller'] = 'content'
     if 'action' not in routekeys and 'action' not in kargs \
        and not self.explicit:
         kargs['action'] = 'index'
     defaultkeys = frozenset([key for key in kargs.keys() \
                              if key not in reserved_keys])
     for key in defaultkeys:
         if kargs[key] != None:
             defaults[key] = unicode(kargs[key])
         else:
             defaults[key] = None
     if 'action' in routekeys and not defaults.has_key('action') \
        and not self.explicit:
         defaults['action'] = 'index'
     if 'id' in routekeys and not defaults.has_key('id') \
        and not self.explicit:
         defaults['id'] = None
     newdefaultkeys = frozenset([key for key in defaults.keys() \
                                 if key not in reserved_keys])
     return (defaults, newdefaultkeys)
Beispiel #3
0
    def _setup_route(self):
        # Build our routelist, and the keys used in the route
        self.routelist = routelist = self._pathkeys(self.routepath)
        routekeys = frozenset([key['name'] for key in routelist
                               if isinstance(key, dict)])
        self.dotkeys = frozenset([key['name'] for key in routelist
                                  if isinstance(key, dict) and 
                                     key['type'] == '.'])

        if not self.minimization:
            self.make_full_route()
        
        # Build a req list with all the regexp requirements for our args
        self.req_regs = {}
        for key, val in self.reqs.iteritems():
            self.req_regs[key] = re.compile('^' + val + '$')
        # Update our defaults and set new default keys if needed. defaults
        # needs to be saved
        (self.defaults, defaultkeys) = self._defaults(routekeys, 
                                                      self.reserved_keys, 
                                                      self._kargs.copy())
        # Save the maximum keys we could utilize
        self.maxkeys = defaultkeys | routekeys
        
        # Populate our minimum keys, and save a copy of our backward keys for
        # quicker generation later
        (self.minkeys, self.routebackwards) = self._minkeys(routelist[:])
        
        # Populate our hardcoded keys, these are ones that are set and don't 
        # exist in the route
        self.hardcoded = frozenset([key for key in self.maxkeys \
            if key not in routekeys and self.defaults[key] is not None])
        
        # Cache our default keys
        self._default_keys = frozenset(self.defaults.keys())
Beispiel #4
0
 def reorder_steps(self, id_list):
     """
     Reorders steps based on order provided in id_list.
     
     Raises ValueError if id_list contains invalid ids, or
     does not contain ids of all corresponding steps.
     """
     if type(id_list) != type([]):
         raise ValueError("id_list must be a list")
     
     L = map(int, id_list) # cast all to int()
     S = dict((x.id, x) for x in self.step_set.all()) # map of ids to Step object 
     sk = S.keys()
     if L == sk: return # order is the same. nothing to do.
     
     if len(L) != len(S) or frozenset(L) != frozenset(sk):
         raise ValueError("id_list does not match list of current steps")
     
     # set position of all steps to temp value
     self.step_set.update(position=(models.F('position') + 1) * -1)
     
     # update positions of steps
     for i,v in enumerate(L):
         S[v].position = i
         S[v].save()
Beispiel #5
0
 def _minkeys(self, routelist):
     """Utility function to walk the route backwards
     
     Will also determine the minimum keys we can handle to generate
     a working route.
     
     routelist is a list of the '/' split route path
     defaults is a dict of all the defaults provided for the route
     
     """
     minkeys = []
     backcheck = routelist[:]
     
     # If we don't honor minimization, we need all the keys in the
     # route path
     if not self.minimization:
         for part in backcheck:
             if isinstance(part, dict):
                 minkeys.append(part['name'])
         return (frozenset(minkeys), backcheck)
     
     gaps = False
     backcheck.reverse()
     for part in backcheck:
         if not isinstance(part, dict) and part not in self.done_chars:
             gaps = True
             continue
         elif not isinstance(part, dict):
             continue
         key = part['name']
         if self.defaults.has_key(key) and not gaps:
             continue
         minkeys.append(key)
         gaps = True
     return  (frozenset(minkeys), backcheck)
Beispiel #6
0
 def _get_permissions(self):
     try:
         return self._permissions
     except AttributeError:
         # Permissions haven't been computed yet
         pass
     if not self.user:
         self._permissions= frozenset()
     else:
         self._permissions= frozenset([p.permission_name for p in self.user.permissions])
     return self._permissions
Beispiel #7
0
 def _get_groups(self):
     try:
         return self._groups
     except AttributeError:
         # Groups haven't been computed yet
         pass
     if not self.user:
         self._groups= frozenset()
     else:
         self._groups= frozenset([g.group_name for g in self.user.groups])
     return self._groups
Beispiel #8
0
 def _get_group_ids(self):
     """Get set of group IDs of this identity."""
     try:
         return self._group_ids
     except AttributeError:
         # Groups haven't been computed yet
         pass
     if not self.user:
         self._group_ids = frozenset()
     else:
         self._group_ids = frozenset([g.id for g in self.user.groups])
     return self._group_ids
Beispiel #9
0
 def _get_permissions(self):
     """Get set of permission names of this identity."""
     try:
         return self._permissions
     except AttributeError:
         # Permissions haven't been computed yet
         pass
     if not self.user:
         self._permissions = frozenset()
     else:
         self._permissions = frozenset(
             [p.permission_name for p in self.user.permissions])
     return self._permissions
Beispiel #10
0
 def _get_group_ids(self):
     '''Get set of group IDs of this identity.'''
     try:
         return self._group_ids
     except AttributeError: # pylint: disable-msg=W0704
         # :W0704: Groups haven't been computed yet
         pass
     if not self.groups:
         self._group_ids = frozenset()
     else:
         self._group_ids = frozenset([g.id for g in
             self._user.approved_memberships])
     return self._group_ids
Beispiel #11
0
 def _get_permissions(self):
     try:
         return self._permissions
     except AttributeError:
         # Permissions haven't been computed yet
         pass
     if not self.user:
         self._permissions= frozenset()
     else:
         box = hub.getConnection()
         box.start( isolation = dejavu.storage.isolation.READ_COMMITTED )
         self._permissions = frozenset( [ p.permission_name for p in self.user.permissions ] )
         box.flush_all()
     return self._permissions
Beispiel #12
0
 def _get_groups(self):
     try:
         return self._groups
     except AttributeError:
         # Groups haven't been computed yet
         pass
     if not self.user:
         self._groups= frozenset()
     else:
         box = hub.getConnection()
         box.start( isolation = dejavu.storage.isolation.READ_COMMITTED )
         self._groups = frozenset( [ g.group_name for g in self.user.groups ] )
         box.flush_all()
     return self._groups
Beispiel #13
0
 def _get_group_ids(self):
     '''Get set of group IDs of this identity.'''
     try:
         return self._group_ids
     except AttributeError:
         # Groups haven't been computed yet
         pass
     if not self.user:
         self._group_ids = frozenset()
     else:
         ### TG: Difference.  Our model has a many::many for people:groups
         # And an association proxy that links them together
         self._group_ids = frozenset([g.id for g in self.user.approved_memberships])
     return self._group_ids
Beispiel #14
0
def get_wa_runners(lines = []):
    '''Parses predefined file for information about available Wak / Waf runners. 
    
    We are looking into file named wa_runners.conf. If one is not found, or none
    of the entries refer to existing paths, we scan parent folder (.../project/tests/../)
    for filenames matching the following set ("wak.py", "waf-light", "waf")

    @param lines: (Default: []) A list of strings representing the contents of
        the config file. If list is nonempty, we bypass reading the file 
        and parse the lines instead.
    
    @return: A tuple of form: (
            list of runner labels (often the file name) in order as presented in the file, 
            dictionary that maps the label to absolute path
            )
    '''
    print("\nDetecting Wak / Waf runners...")
    values = {}
    if not lines:
        try:
            lines = open('wa_runners.conf').readlines()
        except:
            pass
    for line in lines:
        if line.strip() and not line.strip().startswith("#"):
            k, v = line.split("=", 1)
            k = k.strip()
            v = os.path.abspath( os.path.expanduser(v.strip()) )
            
            if os.path.exists(v):
                print("Detected %s at %s" % (k, v))
                values[k] = v
            else:
                print("Skipping %s. Path not found: %s" % (k, v))
    if not len(values.keys()):
        # you had your chance to specify the runners by hand, it seems we are forced to guess...
        
        # we are supposed to be in %PROJECTFOLDER%/tests. wak.py or waf are likely in %PROJECT FOLDER%
        matches = frozenset(('wak.py', 'waf-light', 'waf')).intersection( frozenset(os.listdir("../")) )
        if not matches:
            raise Exception("No Wak / Waf runners found. Please, insure that wa_runners.conf file has entries proper for your project.")
        for match in matches:
            values[match] = os.path.abspath(os.path.normpath(os.path.join( 
                '../'
                , match
                )))

    print("%s Wak / Waf runners found.\n" % len(values.keys()))
    return sorted(values.keys()), values
Beispiel #15
0
    def generate(self, _ignore_req_list=False, _append_slash=False, **kargs):
        """Generate a URL from ourself given a set of keyword arguments

        Toss an exception if this
        set of keywords would cause a gap in the url.

        """
        # Verify that our args pass any regexp requirements
        if not _ignore_req_list:
            for key in self.reqs.keys():
                val = kargs.get(key)
                if val and not self.req_regs[key].match(self.make_unicode(val)):
                    return False

        # Verify that if we have a method arg, its in the method accept list.
        # Also, method will be changed to _method for route generation
        meth = as_unicode(kargs.get('method'), self.encoding)
        if meth:
            if self.conditions and 'method' in self.conditions \
                    and meth.upper() not in self.conditions['method']:
                return False
            kargs.pop('method')

        if self.minimization:
            url = self.generate_minimized(kargs)
        else:
            url = self.generate_non_minimized(kargs)

        if url is False:
            return url

        if not url.startswith('/') and not self.static:
            url = '/' + url
        extras = frozenset(kargs.keys()) - self.maxkeys
        if extras:
            if _append_slash and not url.endswith('/'):
                url += '/'
            fragments = []
            # don't assume the 'extras' set preserves order: iterate
            # through the ordered kargs instead
            for key in kargs:
                if key not in extras:
                    continue
                if key == 'action' or key == 'controller':
                    continue
                val = kargs[key]
                if isinstance(val, (tuple, list)):
                    for value in val:
                        value = as_unicode(value, self.encoding)
                        fragments.append((key, _str_encode(value,
                                                           self.encoding)))
                else:
                    val = as_unicode(val, self.encoding)
                    fragments.append((key, _str_encode(val, self.encoding)))
            if fragments:
                url += '?'
                url += urlparse.urlencode(fragments)
        elif _append_slash and not url.endswith('/'):
            url += '/'
        return url
Beispiel #16
0
 def get_dfa_state(states):
     states = self.epsilon_closure(states)
     frozenstates = frozenset(states)
     if frozenstates in set_to_state:
         return set_to_state[frozenstates]   # already created this state
     if states == self.start_states:
         assert not set_to_state
     final = bool(
         filter(None, [state in self.final_states for state in states]))
     name = ", ".join([self.names[state] for state in states])
     if name_precedence is not None:
         name_index = len(name_precedence)
     unmergeable = False
     for state in states:
         #print state
         if state in self.unmergeable_states:
             new_name = self.names[state]
             if name_precedence is not None:
                 try:
                     index = name_precedence.index(new_name)
                 except ValueError:
                     index = name_index
                 #print new_name, index, name_precedence
                 if index < name_index:
                     name_index = index
                     name = new_name
             else:
                 name = new_name
             unmergeable = True
     result = set_to_state[frozenstates] = fda.add_state(
         name, final, unmergeable)
     stack.append((result, states))
     return result
Beispiel #17
0
    def _mergePage(self, page2, page2transformation = None):
        newResources = DictionaryObject()
        rename = {}
        originalResources = self['/Resources'].getObject()
        page2Resources = page2['/Resources'].getObject()
        for res in ('/ExtGState', '/Font', '/XObject', '/ColorSpace', '/Pattern', '/Shading', '/Properties'):
            new, newrename = PageObject._mergeResources(originalResources, page2Resources, res)
            if new:
                newResources[NameObject(res)] = new
                rename.update(newrename)

        newResources[NameObject('/ProcSet')] = ArrayObject(frozenset(originalResources.get('/ProcSet', ArrayObject()).getObject()).union(frozenset(page2Resources.get('/ProcSet', ArrayObject()).getObject())))
        newContentArray = ArrayObject()
        originalContent = self.getContents()
        if originalContent is not None:
            newContentArray.append(PageObject._pushPopGS(originalContent, self.pdf))
        page2Content = page2.getContents()
        if page2Content is not None:
            if page2transformation is not None:
                page2Content = page2transformation(page2Content)
            page2Content = PageObject._contentStreamRename(page2Content, rename, self.pdf)
            page2Content = PageObject._pushPopGS(page2Content, self.pdf)
            newContentArray.append(page2Content)
        self[NameObject('/Contents')] = ContentStream(newContentArray, self.pdf)
        self[NameObject('/Resources')] = newResources
Beispiel #18
0
    def __init__(self, visit_key, user=None, username=None, password=None):
        if user:
            self._user = user
            self._groups = frozenset(
                    [g['name'] for g in user['approved_memberships']]
                    )
        self.visit_key = visit_key
        if visit_key:
            # Set the cookie to the user's tg_visit key before requesting
            # authentication.  That way we link the two together.
            session_id = visit_key
        else:
            session_id = None

        debug = config.get('jsonfas.debug', False)
        super(JsonFasIdentity, self).__init__(self.fas_url,
                useragent=self.useragent, debug=debug,
                username=username, password=password,
                session_id=session_id, cache_session=self.cache_session)

        if self.debug:
            import inspect
            caller = inspect.getouterframes(inspect.currentframe())[1][3]
            log.debug('JsonFasIdentity.__init__ caller: %s' % caller)

        response.simple_cookie[self.cookie_name] = visit_key

        # Send a request so that we associate the visit_cookie with the user
        self.send_request('', auth=True)
        log.debug('Leaving JsonFasIdentity.__init__')
Beispiel #19
0
    def _get_user(self):
        '''Get user instance for this identity.'''
        visit = self.visit_key
        if not visit:
            # No visit, no user
            self._user = None
        else:
            if not (self.username and self.password):
                # Unless we were given the user_name and password to login on
                # this request, a CSRF token is required
                if (not '_csrf_token' in cherrypy.request.params or
                        cherrypy.request.params['_csrf_token'] !=
                        hash_constructor(self.visit_key).hexdigest()):
                    self.log.info("Bad _csrf_token")
                    if '_csrf_token' in cherrypy.request.params:
                        self.log.info("visit: %s token: %s" % (self.visit_key,
                            cherrypy.request.params['_csrf_token']))
                    else:
                        self.log.info('No _csrf_token present')
                    cherrypy.request.fas_identity_failure_reason = 'bad_csrf'
                    self._user = None

        # pylint: disable-msg=W0704
            try:
                return self._user
            except AttributeError:
                # User hasn't already been set
                # Attempt to load the user. After this code executes, there
                # *will* be a _user attribute, even if the value is None.
                self._user = self.__retrieve_user()
            self._groups = frozenset(
                    [g['name'] for g in self._user.approved_memberships]
                    )
        # pylint: enable-msg=W0704
        return self._user
Beispiel #20
0
 def generateImpliedEndTags(self, exclude=None):
     name = self.openElements[-1].name
     # XXX td, th and tr are not actually needed
     if name in frozenset(("dd", "dt", "li", "option", "optgroup", "p", "rp", "rt")) and name != exclude:
         self.openElements.pop()
         # XXX This is not entirely what the specification says. We should
         # investigate it more closely.
         self.generateImpliedEndTags(exclude)
Beispiel #21
0
 def append(self, left, right, expression):
     if self.is_joined(left, right): raise ValueError('Already joined')
     sources = frozenset([f.source for f in iter_fields(expression)])
     if (len(sources) != 2) or (left not in sources) or (right not in sources):
         raise ValueError('Only expressions of fields of joining sources are allowed')
     self.lefts.append(left)
     self.rights.append(right)
     self.expressions.append(expression)
Beispiel #22
0
    def closeTagOpenState(self):
        if (self.contentModelFlag in (contentModelFlags["RCDATA"],
            contentModelFlags["CDATA"])):
            if self.currentToken:
                charStack = []

                # So far we know that "</" has been consumed. We now need to know
                # whether the next few characters match the name of last emitted
                # start tag which also happens to be the currentToken. We also need
                # to have the character directly after the characters that could
                # match the start tag name.
                for x in xrange(len(self.currentToken["name"]) + 1):
                    charStack.append(self.stream.char())
                    # Make sure we don't get hit by EOF
                    if charStack[-1] == EOF:
                        break

                # Since this is just for checking. We put the characters back on
                # the stack.
                self.stream.unget(charStack)

            if self.currentToken \
              and self.currentToken["name"].lower() == "".join(charStack[:-1]).lower() \
              and charStack[-1] in (spaceCharacters |
              frozenset((u">", u"/", u"<", EOF))):
                # Because the characters are correct we can safely switch to
                # PCDATA mode now. This also means we don't have to do it when
                # emitting the end tag token.
                self.contentModelFlag = contentModelFlags["PCDATA"]
            else:
                self.tokenQueue.append({"type": "Characters", "data": u"</"})
                self.state = self.states["data"]

                # Need to return here since we don't want the rest of the
                # method to be walked through.
                return True

        data = self.stream.char()
        if data in asciiLetters:
            self.currentToken = {"type":"EndTag", "name":data, "data":[]}
            self.state = self.states["tagName"]
        elif data == u">":
            self.tokenQueue.append({"type": "ParseError", "data":
              "expected-closing-tag-but-got-right-bracket"})
            self.state = self.states["data"]
        elif data == EOF:
            self.tokenQueue.append({"type": "ParseError", "data":
              "expected-closing-tag-but-got-eof"})
            self.tokenQueue.append({"type": "Characters", "data": u"</"})
            self.state = self.states["data"]
        else:
            # XXX data can be _'_...
            self.tokenQueue.append({"type": "ParseError", "data":
              "expected-closing-tag-but-got-char",
              "datavars": {"data": data}})
            self.stream.unget(data)
            self.state = self.states["bogusComment"]
        return True
Beispiel #23
0
    def __init__(self, preserve=None, noescape=None):
        """Initialize the filter.

        :param preserve: a set or sequence of tag names for which white-space
                         should be preserved
        :param noescape: a set or sequence of tag names for which text content
                         should not be escaped

        The `noescape` set is expected to refer to elements that cannot contain
        further child elements (such as ``<style>`` or ``<script>`` in HTML
        documents).
        """
        if preserve is None:
            preserve = []
        self.preserve = frozenset(preserve)
        if noescape is None:
            noescape = []
        self.noescape = frozenset(noescape)
Beispiel #24
0
 def _get_groups(self):
     '''Return the groups that a user is a member of.'''
     try:
         return self._groups
     except AttributeError:
         # User and groups haven't been returned.  Since the json call
         # returns both user and groups, this is set at user creation time.
         self._groups = frozenset()
     return self._groups
Beispiel #25
0
 def checkBrowsingContext(self, token, tagName, attrName, attrValue):
     if not attrValue: return
     if attrValue[0] != '_': return
     attrValue = attrValue.lower()
     if attrValue in frozenset(('_self', '_parent', '_top', '_blank')): return
     yield {"type": tokenTypes["ParseError"],
            "data": "invalid-browsing-context",
            "datavars": {"tagName": tagName,
                         "attributeName": attrName}}
Beispiel #26
0
def freezedicts(obj):
    """Recursively iterate over ``obj``, supporting dicts, tuples
    and lists, and freeze ``dicts`` such that ``obj`` can be used
    with hash().
    """
    if isinstance(obj, (list, tuple)):
        return type(obj)([freezedicts(sub) for sub in obj])
    if isinstance(obj, dict):
        return frozenset(six.iteritems(obj))
    return obj
Beispiel #27
0
 def checkStartTagUnknownAttributes(self, token):
     # check for recognized attribute names
     name = token.get("name").lower()
     allowedAttributes = globalAttributes | allowedAttributeMap.get(name, frozenset(()))
     for attrName, attrValue in token.get("data", []):
         if attrName.lower() not in allowedAttributes:
             yield {"type": tokenTypes["ParseError"],
                    "data": "unknown-attribute",
                    "datavars": {"tagName": name,
                                 "attributeName": attrName}}
Beispiel #28
0
    def pkey(self, cl, newpkey=None):
        """This method gets or sets the primary key of a class.

        Composite primary keys are represented as frozensets. Note that
        this raises an exception if the table does not have a primary key.

        If newpkey is set and is not a dictionary then set that
        value as the primary key of the class.  If it is a dictionary
        then replace the _pkeys dictionary with a copy of it.

        """
        # First see if the caller is supplying a dictionary
        if isinstance(newpkey, dict):
            # make sure that all classes have a namespace
            self._pkeys = dict([
                ('.' in cl and cl or 'public.' + cl, pkey)
                for cl, pkey in newpkey.iteritems()])
            return self._pkeys

        qcl = self._add_schema(cl)  # build fully qualified class name
        # Check if the caller is supplying a new primary key for the class
        if newpkey:
            self._pkeys[qcl] = newpkey
            return newpkey

        # Get all the primary keys at once
        if qcl not in self._pkeys:
            # if not found, check again in case it was added after we started
            self._pkeys = {}
            if self.server_version >= 80200:
                # the ANY syntax works correctly only with PostgreSQL >= 8.2
                any_indkey = "= ANY (pg_index.indkey)"
            else:
                any_indkey = "IN (%s)" % ', '.join(
                    ['pg_index.indkey[%d]' % i for i in range(16)])
            for r in self.db.query(
                "SELECT pg_namespace.nspname, pg_class.relname,"
                    " pg_attribute.attname FROM pg_class"
                " JOIN pg_namespace"
                    " ON pg_namespace.oid = pg_class.relnamespace"
                    " AND pg_namespace.nspname NOT LIKE 'pg_%'"
                " JOIN pg_attribute ON pg_attribute.attrelid = pg_class.oid"
                    " AND pg_attribute.attisdropped = 'f'"
                " JOIN pg_index ON pg_index.indrelid = pg_class.oid"
                    " AND pg_index.indisprimary = 't'"
                    " AND pg_attribute.attnum " + any_indkey).getresult():
                cl, pkey = _join_parts(r[:2]), r[2]
                self._pkeys.setdefault(cl, []).append(pkey)
            # (only) for composite primary keys, the values will be frozensets
            for cl, pkey in self._pkeys.iteritems():
                self._pkeys[cl] = len(pkey) > 1 and frozenset(pkey) or pkey[0]
            self._do_debug(self._pkeys)

        # will raise an exception if primary key doesn't exist
        return self._pkeys[qcl]
Beispiel #29
0
 def checkBoolean(self, token, tagName, attrName, attrValue):
     enumeratedValues = frozenset((attrName, ''))
     if attrValue not in enumeratedValues:
         yield {"type": tokenTypes["ParseError"],
                "data": "invalid-boolean-value",
                "datavars": {"tagName": tagName,
                             "attributeName": attrName,
                             "enumeratedValues": tuple(enumeratedValues)}}
         yield {"type": tokenTypes["ParseError"],
                "data": "invalid-attribute-value",
                "datavars": {"tagName": tagName,
                             "attributeName": attrName}}
Beispiel #30
0
 def _get_groups(self):
     '''Return the groups that a user is a member of.'''
     try:
         return self._groups
     except AttributeError: # pylint: disable-msg=W0704
         # :W0704: Groups haven't been computed yet
         pass
     if not self.user:
         # User and groups haven't been returned.  Since the json call
         # computes both user and groups, this will now be set.
         self._groups = frozenset()
     return self._groups
Beispiel #31
0
    "expected-eof-but-got-end-tag":
    _(u"Unexpected end tag (%(name)s)"
      u". Expected end of file."),
    "eof-in-table":
    _(u"Unexpected end of file. Expected table content."),
    "eof-in-select":
    _(u"Unexpected end of file. Expected select content."),
    "eof-in-frameset":
    _(u"Unexpected end of file. Expected frameset content."),
    "XXX-undefined-error":
    (u"Undefined error (this sucks and should be fixed)"),
}

contentModelFlags = {"PCDATA": 0, "RCDATA": 1, "CDATA": 2, "PLAINTEXT": 3}

scopingElements = frozenset(("applet", "button", "caption", "html", "marquee",
                             "object", "table", "td", "th"))

formattingElements = frozenset(("a", "b", "big", "em", "font", "i", "nobr",
                                "s", "small", "strike", "strong", "tt", "u"))

specialElements = frozenset((
    "address",
    "area",
    "article",
    "aside",
    "base",
    "basefont",
    "bgsound",
    "blockquote",
    "body",
    "br",
Beispiel #32
0
# FIXME: this should all be confirmed against what a DTD says
# (probably in a test; this may not match the DTD exactly, but we
# should document just how it differs).

# Data taken from http://www.w3.org/TR/html401/index/elements.html
# and http://www.w3.org/community/webed/wiki/HTML/New_HTML5_Elements
# for html5_tags.

try:
    frozenset
except NameError:
    from sets import Set as frozenset

empty_tags = frozenset([
    'area', 'base', 'basefont', 'br', 'col', 'frame', 'hr', 'img', 'input',
    'isindex', 'link', 'meta', 'param'
])

deprecated_tags = frozenset([
    'applet', 'basefont', 'center', 'dir', 'font', 'isindex', 'menu', 's',
    'strike', 'u'
])

# archive actually takes a space-separated list of URIs
link_attrs = frozenset([
    'action',
    'archive',
    'background',
    'cite',
    'classid',
    'codebase',
Beispiel #33
0
class InequalityIndex(SeededIndex):

    __slots__ = ('last_cases', 'last_out')

    dispatch_function = staticmethod(dispatch_by_inequalities)

    def __init__(self):
        self.clear()

    def count_for(self, cases):
        """Get the total count of outgoing branches, given incoming cases"""
        casemap = self.casemap_for(cases)
        return len(casemap), sum([len(x) for x in casemap.values()])

    def clear(self):
        """Reset index to empty"""
        self.allSeeds = {}  # set of all seeds
        self.criteria = {}  # criterion -> applicable seeds
        self.last_cases = None
        self.last_out = None

    def __setitem__(self, criterion, case):
        """Register 'case' under each of the criterion's seeds"""
        self.criteria[case] = criterion
        for (lo, hi) in criterion.ranges:
            self.allSeeds[lo] = self.allSeeds[hi] = None

    def addSeed(self, seed):
        raise NotImplementedError

    def casemap_for(self, cases):
        """Return a mapping from seeds->caselists for the given cases"""
        if cases is self.last_cases or cases == self.last_cases:
            return self.last_out
        tmp = {}
        out = {}
        get = self.criteria.get
        all = Inequality('..', [(Min, Max)])
        have_ineq = False
        for case in cases:
            for (lo, hi) in get(case, all).ranges:
                if lo not in tmp:
                    tmp[lo] = [], [], []
                if lo == hi:
                    tmp[lo][2].append(case)
                else:
                    have_ineq = True
                    if hi not in tmp:
                        tmp[hi] = [], [], []
                    tmp[lo][0].append(case)
                    if hi is not Max: tmp[hi][1].append(case)
        if have_ineq:
            keys = list(tmp.keys())
            keys.sort()
            current = frozenset(tmp.get(Min, [[]])[0])
            hi = Min
            for val in keys:
                add, remove, eq = tmp[val]
                lo, hi = hi, val
                out[lo, hi] = current
                current = current.difference(remove)
                out[val, val] = current.union(eq)
                current = current.union(add)
        else:
            out[Min, Max] = []  # default
            for val, (add, remove, eq) in list(tmp.items()):
                out[val, val] = eq
        self.last_out = out
        self.last_cases = cases
        return out
Beispiel #34
0
class HTMLSerializer(XHTMLSerializer):
    """Produces HTML text from an event stream.
    
    >>> from libs.genshi.builder import tag
    >>> elem = tag.div(tag.a(href='foo'), tag.br, tag.hr(noshade=True))
    >>> print ''.join(HTMLSerializer()(elem.generate()))
    <div><a href="foo"></a><br><hr noshade></div>
    """

    _NOESCAPE_ELEMS = frozenset([
        QName('script'),
        QName('http://www.w3.org/1999/xhtml}script'),
        QName('style'),
        QName('http://www.w3.org/1999/xhtml}style')
    ])

    def __init__(self, doctype=None, strip_whitespace=True):
        """Initialize the HTML serializer.
        
        :param doctype: a ``(name, pubid, sysid)`` tuple that represents the
                        DOCTYPE declaration that should be included at the top
                        of the generated output
        :param strip_whitespace: whether extraneous whitespace should be
                                 stripped from the output
        """
        super(HTMLSerializer, self).__init__(doctype, False)
        self.filters = [EmptyTagFilter()]
        if strip_whitespace:
            self.filters.append(
                WhitespaceFilter(self._PRESERVE_SPACE, self._NOESCAPE_ELEMS))
        self.filters.append(NamespaceStripper('http://www.w3.org/1999/xhtml'))

    def __call__(self, stream):
        boolean_attrs = self._BOOLEAN_ATTRS
        empty_elems = self._EMPTY_ELEMS
        noescape_elems = self._NOESCAPE_ELEMS
        have_doctype = False
        noescape = False

        stream = chain(self.preamble, stream)
        for filter_ in self.filters:
            stream = filter_(stream)
        for kind, data, pos in stream:

            if kind is START or kind is EMPTY:
                tag, attrib = data
                buf = ['<', tag]
                for attr, value in attrib:
                    if attr in boolean_attrs:
                        if value:
                            buf += [' ', attr]
                    else:
                        buf += [' ', attr, '="', escape(value), '"']
                buf.append('>')
                if kind is EMPTY:
                    if tag not in empty_elems:
                        buf.append('</%s>' % tag)
                yield Markup(u''.join(buf))
                if tag in noescape_elems:
                    noescape = True

            elif kind is END:
                yield Markup('</%s>' % data)
                noescape = False

            elif kind is TEXT:
                if noescape:
                    yield data
                else:
                    yield escape(data, quotes=False)

            elif kind is COMMENT:
                yield Markup('<!--%s-->' % data)

            elif kind is DOCTYPE and not have_doctype:
                name, pubid, sysid = data
                buf = ['<!DOCTYPE %s']
                if pubid:
                    buf.append(' PUBLIC "%s"')
                elif sysid:
                    buf.append(' SYSTEM')
                if sysid:
                    buf.append(' "%s"')
                buf.append('>\n')
                yield Markup(u''.join(buf), *filter(None, data))
                have_doctype = True

            elif kind is PI:
                yield Markup('<?%s %s?>' % data)
Beispiel #35
0
class XMLSerializer(object):
    """Produces XML text from an event stream.
    
    >>> from libs.genshi.builder import tag
    >>> elem = tag.div(tag.a(href='foo'), tag.br, tag.hr(noshade=True))
    >>> print ''.join(XMLSerializer()(elem.generate()))
    <div><a href="foo"/><br/><hr noshade="True"/></div>
    """

    _PRESERVE_SPACE = frozenset()

    def __init__(self,
                 doctype=None,
                 strip_whitespace=True,
                 namespace_prefixes=None):
        """Initialize the XML serializer.
        
        :param doctype: a ``(name, pubid, sysid)`` tuple that represents the
                        DOCTYPE declaration that should be included at the top
                        of the generated output, or the name of a DOCTYPE as
                        defined in `DocType.get`
        :param strip_whitespace: whether extraneous whitespace should be
                                 stripped from the output
        :note: Changed in 0.4.2: The  `doctype` parameter can now be a string.
        """
        self.preamble = []
        if doctype:
            if isinstance(doctype, basestring):
                doctype = DocType.get(doctype)
            self.preamble.append((DOCTYPE, doctype, (None, -1, -1)))
        self.filters = [EmptyTagFilter()]
        if strip_whitespace:
            self.filters.append(WhitespaceFilter(self._PRESERVE_SPACE))
        self.filters.append(NamespaceFlattener(prefixes=namespace_prefixes))

    def __call__(self, stream):
        have_decl = have_doctype = False
        in_cdata = False

        stream = chain(self.preamble, stream)
        for filter_ in self.filters:
            stream = filter_(stream)
        for kind, data, pos in stream:

            if kind is START or kind is EMPTY:
                tag, attrib = data
                buf = ['<', tag]
                for attr, value in attrib:
                    buf += [' ', attr, '="', escape(value), '"']
                buf.append(kind is EMPTY and '/>' or '>')
                yield Markup(u''.join(buf))

            elif kind is END:
                yield Markup('</%s>' % data)

            elif kind is TEXT:
                if in_cdata:
                    yield data
                else:
                    yield escape(data, quotes=False)

            elif kind is COMMENT:
                yield Markup('<!--%s-->' % data)

            elif kind is XML_DECL and not have_decl:
                version, encoding, standalone = data
                buf = ['<?xml version="%s"' % version]
                if encoding:
                    buf.append(' encoding="%s"' % encoding)
                if standalone != -1:
                    standalone = standalone and 'yes' or 'no'
                    buf.append(' standalone="%s"' % standalone)
                buf.append('?>\n')
                yield Markup(u''.join(buf))
                have_decl = True

            elif kind is DOCTYPE and not have_doctype:
                name, pubid, sysid = data
                buf = ['<!DOCTYPE %s']
                if pubid:
                    buf.append(' PUBLIC "%s"')
                elif sysid:
                    buf.append(' SYSTEM')
                if sysid:
                    buf.append(' "%s"')
                buf.append('>\n')
                yield Markup(u''.join(buf), *filter(None, data))
                have_doctype = True

            elif kind is START_CDATA:
                yield Markup('<![CDATA[')
                in_cdata = True

            elif kind is END_CDATA:
                yield Markup(']]>')
                in_cdata = False

            elif kind is PI:
                yield Markup('<?%s %s?>' % data)
Beispiel #36
0
 def _get_permissions(self):
     '''Get set of permission names of this identity.'''
     ### TG difference: No permissions in FAS
     return frozenset()
Beispiel #37
0
# This software consists of voluntary contributions made by many
# individuals. For exact contribution history, see the revision
# history and logs, available at http://projects.edgewall.com/trac/.

import htmlentitydefs
from HTMLParser import HTMLParser, HTMLParseError
import re
try:
    frozenset
except NameError:
    from sets import ImmutableSet as frozenset
from StringIO import StringIO

__all__ = ['escape', 'unescape', 'html']

_EMPTY_TAGS = frozenset(['br', 'hr', 'img', 'input'])
_BOOLEAN_ATTRS = frozenset([
    'selected', 'checked', 'compact', 'declare', 'defer', 'disabled', 'ismap',
    'multiple', 'nohref', 'noresize', 'noshade', 'nowrap'
])


class Markup(unicode):
    """Marks a string as being safe for inclusion in XML output without needing
    to be escaped.
    
    Strings are normally automatically escaped when added to the HDF.
    `Markup`-strings are however an exception. Use with care.
    
    (since Trac 0.9.3)
    """
Beispiel #38
0
INDENT_REGEX = re.compile(r'([ \t]*)')
RAISE_COMMA_REGEX = re.compile(r'raise\s+\w+\s*(,)')
SELFTEST_REGEX = re.compile(r'(Okay|[EW]\d{3}):\s(.*)')
ERRORCODE_REGEX = re.compile(r'[EW]\d{3}')
DOCSTRING_REGEX = re.compile(r'u?r?["\']')
WHITESPACE_AROUND_OPERATOR_REGEX = \
    re.compile('([^\w\s]*)\s*(\t|  )\s*([^\w\s]*)')
EXTRANEOUS_WHITESPACE_REGEX = re.compile(r'[[({] | []}),;:]')
WHITESPACE_AROUND_NAMED_PARAMETER_REGEX = \
    re.compile(r'[()]|\s=[^=]|[^=!<>]=\s')
LAMBDA_REGEX = re.compile(r'\blambda\b')

WHITESPACE = ' \t'

BINARY_OPERATORS = frozenset(['**=', '*=', '+=', '-=', '!=', '<>',
    '%=', '^=', '&=', '|=', '==', '/=', '//=', '<=', '>=', '<<=', '>>=',
    '%',  '^',  '&',  '|',  '=',  '/',  '//',  '<',  '>',  '<<'])
UNARY_OPERATORS = frozenset(['>>', '**', '*', '+', '-'])
OPERATORS = BINARY_OPERATORS | UNARY_OPERATORS
SKIP_TOKENS = frozenset([tokenize.COMMENT, tokenize.NL, tokenize.INDENT,
                         tokenize.DEDENT, tokenize.NEWLINE])
E225NOT_KEYWORDS = (frozenset(keyword.kwlist + ['print']) -
                    frozenset(['False', 'None', 'True']))
BENCHMARK_KEYS = ('directories', 'files', 'logical lines', 'physical lines')


##############################################################################
# Plugins (check functions) for physical lines
##############################################################################

    def match(self,
              url,
              environ=None,
              sub_domains=False,
              sub_domains_ignore=None,
              domain_match=''):
        """Match a url to our regexp. 
        
        While the regexp might match, this operation isn't
        guaranteed as there's other factors that can cause a match to
        fail even though the regexp succeeds (Default that was relied
        on wasn't given, requirement regexp doesn't pass, etc.).
        
        Therefore the calling function shouldn't assume this will
        return a valid dict, the other possible return is False if a
        match doesn't work out.
        
        """
        # Static routes don't match, they generate only
        if self.static:
            return False

        match = self.regmatch.match(url)

        if not match:
            return False

        sub_domain = None

        if sub_domains and environ and 'HTTP_HOST' in environ:
            host = environ['HTTP_HOST'].split(':')[0]
            sub_match = re.compile('^(.+?)\.%s$' % domain_match)
            subdomain = re.sub(sub_match, r'\1', host)
            if subdomain not in sub_domains_ignore and host != subdomain:
                sub_domain = subdomain

        if self.conditions:
            if 'method' in self.conditions and environ and \
                environ['REQUEST_METHOD'] not in self.conditions['method']:
                return False

            # Check sub-domains?
            use_sd = self.conditions.get('sub_domain')
            if use_sd and not sub_domain:
                return False
            elif not use_sd and 'sub_domain' in self.conditions and sub_domain:
                return False
            if isinstance(use_sd, list) and sub_domain not in use_sd:
                return False

        matchdict = match.groupdict()
        result = {}
        extras = self._default_keys - frozenset(matchdict.keys())
        for key, val in matchdict.iteritems():
            if key != 'path_info' and self.encoding:
                # change back into python unicode objects from the URL
                # representation
                try:
                    val = as_unicode(val, self.encoding, self.decode_errors)
                except UnicodeDecodeError:
                    return False

            if not val and key in self.defaults and self.defaults[key]:
                result[key] = self.defaults[key]
            else:
                result[key] = val
        for key in extras:
            result[key] = self.defaults[key]

        # Add the sub-domain if there is one
        if sub_domains:
            result['sub_domain'] = sub_domain

        # If there's a function, call it with environ and expire if it
        # returns False
        if self.conditions and 'function' in self.conditions and \
            not self.conditions['function'](environ, result):
            return False

        return result
    def buildnextreg(self, path, clist, include_names=True):
        """Recursively build our regexp given a path, and a controller
        list.
        
        Returns the regular expression string, and two booleans that
        can be ignored as they're only used internally by buildnextreg.
        
        """
        if path:
            part = path[0]
        else:
            part = ''
        reg = ''

        # noreqs will remember whether the remainder has either a string
        # match, or a non-defaulted regexp match on a key, allblank remembers
        # if the rest could possible be completely empty
        (rest, noreqs, allblank) = ('', True, True)
        if len(path[1:]) > 0:
            self.prior = part
            (rest, noreqs,
             allblank) = self.buildnextreg(path[1:], clist, include_names)

        if isinstance(part, dict) and part['type'] in (':', '.'):
            var = part['name']
            typ = part['type']
            partreg = ''

            # First we plug in the proper part matcher
            if self.reqs.has_key(var):
                if include_names:
                    partreg = '(?P<%s>%s)' % (var, self.reqs[var])
                else:
                    partreg = '(?:%s)' % self.reqs[var]
                if typ == '.':
                    partreg = '(?:\.%s)??' % partreg
            elif var == 'controller':
                if include_names:
                    partreg = '(?P<%s>%s)' % (var, '|'.join(
                        map(re.escape, clist)))
                else:
                    partreg = '(?:%s)' % '|'.join(map(re.escape, clist))
            elif self.prior in ['/', '#']:
                if include_names:
                    partreg = '(?P<' + var + '>[^' + self.prior + ']+?)'
                else:
                    partreg = '(?:[^' + self.prior + ']+?)'
            else:
                if not rest:
                    if typ == '.':
                        exclude_chars = '/.'
                    else:
                        exclude_chars = '/'
                    if include_names:
                        partreg = '(?P<%s>[^%s]+?)' % (var, exclude_chars)
                    else:
                        partreg = '(?:[^%s]+?)' % exclude_chars
                    if typ == '.':
                        partreg = '(?:\.%s)??' % partreg
                else:
                    end = ''.join(self.done_chars)
                    rem = rest
                    if rem[0] == '\\' and len(rem) > 1:
                        rem = rem[1]
                    elif rem.startswith('(\\') and len(rem) > 2:
                        rem = rem[2]
                    else:
                        rem = end
                    rem = frozenset(rem) | frozenset(['/'])
                    if include_names:
                        partreg = '(?P<%s>[^%s]+?)' % (var, ''.join(rem))
                    else:
                        partreg = '(?:[^%s]+?)' % ''.join(rem)

            if self.reqs.has_key(var):
                noreqs = False
            if not self.defaults.has_key(var):
                allblank = False
                noreqs = False

            # Now we determine if its optional, or required. This changes
            # depending on what is in the rest of the match. If noreqs is
            # true, then its possible the entire thing is optional as there's
            # no reqs or string matches.
            if noreqs:
                # The rest is optional, but now we have an optional with a
                # regexp. Wrap to ensure that if we match anything, we match
                # our regexp first. It's still possible we could be completely
                # blank as we have a default
                if self.reqs.has_key(var) and self.defaults.has_key(var):
                    reg = '(' + partreg + rest + ')?'

                # Or we have a regexp match with no default, so now being
                # completely blank form here on out isn't possible
                elif self.reqs.has_key(var):
                    allblank = False
                    reg = partreg + rest

                # If the character before this is a special char, it has to be
                # followed by this
                elif self.defaults.has_key(var) and \
                     self.prior in (',', ';', '.'):
                    reg = partreg + rest

                # Or we have a default with no regexp, don't touch the allblank
                elif self.defaults.has_key(var):
                    reg = partreg + '?' + rest

                # Or we have a key with no default, and no reqs. Not possible
                # to be all blank from here
                else:
                    allblank = False
                    reg = partreg + rest
            # In this case, we have something dangling that might need to be
            # matched
            else:
                # If they can all be blank, and we have a default here, we know
                # its safe to make everything from here optional. Since
                # something else in the chain does have req's though, we have
                # to make the partreg here required to continue matching
                if allblank and self.defaults.has_key(var):
                    reg = '(' + partreg + rest + ')?'

                # Same as before, but they can't all be blank, so we have to
                # require it all to ensure our matches line up right
                else:
                    reg = partreg + rest
        elif isinstance(part, dict) and part['type'] == '*':
            var = part['name']
            if noreqs:
                if include_names:
                    reg = '(?P<%s>.*)' % var + rest
                else:
                    reg = '(?:.*)' + rest
                if not self.defaults.has_key(var):
                    allblank = False
                    noreqs = False
            else:
                if allblank and self.defaults.has_key(var):
                    if include_names:
                        reg = '(?P<%s>.*)' % var + rest
                    else:
                        reg = '(?:.*)' + rest
                elif self.defaults.has_key(var):
                    if include_names:
                        reg = '(?P<%s>.*)' % var + rest
                    else:
                        reg = '(?:.*)' + rest
                else:
                    if include_names:
                        reg = '(?P<%s>.*)' % var + rest
                    else:
                        reg = '(?:.*)' + rest
                    allblank = False
                    noreqs = False
        elif part and part[-1] in self.done_chars:
            if allblank:
                reg = re.escape(part[:-1]) + '(' + re.escape(part[-1]) + rest
                reg += ')?'
            else:
                allblank = False
                reg = re.escape(part) + rest

        # We have a normal string here, this is a req, and it prevents us from
        # being all blank
        else:
            noreqs = False
            allblank = False
            reg = re.escape(part) + rest

        return (reg, noreqs, allblank)
Beispiel #41
0
    def __init__(self, routepath, **kargs):
        """Initialize a route, with a given routepath for
        matching/generation
        
        The set of keyword args will be used as defaults.
        
        Usage::
        
            >>> from routes.base import Route
            >>> newroute = Route(':controller/:action/:id')
            >>> sorted(newroute.defaults.items())
            [('action', 'index'), ('id', None)]
            >>> newroute = Route('date/:year/:month/:day',  
            ...     controller="blog", action="view")
            >>> newroute = Route('archives/:page', controller="blog", 
            ...     action="by_page", requirements = { 'page':'\d{1,2}' })
            >>> newroute.reqs
            {'page': '\\\d{1,2}'}
        
        .. Note:: 
            Route is generally not called directly, a Mapper instance
            connect method should be used to add routes.
        
        """
        self.routepath = routepath
        self.sub_domains = False
        self.prior = None
        self.redirect = False
        self.name = None
        self.minimization = kargs.pop('_minimize', True)
        self.encoding = kargs.pop('_encoding', 'utf-8')
        self.reqs = kargs.get('requirements', {})
        self.decode_errors = 'replace'

        # Don't bother forming stuff we don't need if its a static route
        self.static = kargs.pop('_static', False)
        self.filter = kargs.pop('_filter', None)
        self.absolute = kargs.pop('_absolute', False)

        # Pull out the member/collection name if present, this applies only to
        # map.resource
        self.member_name = kargs.pop('_member_name', None)
        self.collection_name = kargs.pop('_collection_name', None)
        self.parent_resource = kargs.pop('_parent_resource', None)

        # Pull out route conditions
        self.conditions = kargs.pop('conditions', None)

        # Determine if explicit behavior should be used
        self.explicit = kargs.pop('_explicit', False)

        # reserved keys that don't count
        reserved_keys = ['requirements']

        # special chars to indicate a natural split in the URL
        self.done_chars = ('/', ',', ';', '.', '#')

        # Since static need to be generated exactly, treat them as
        # non-minimized
        if self.static:
            self.external = '://' in self.routepath
            self.minimization = False

        # Strip preceding '/' if present, and not minimizing
        if routepath.startswith('/') and self.minimization:
            routepath = routepath[1:]

        # Build our routelist, and the keys used in the route
        self.routelist = routelist = self._pathkeys(routepath)
        routekeys = frozenset([key['name'] for key in routelist \
                               if isinstance(key, dict)])

        if not self.minimization:
            self.make_full_route()

        # Build a req list with all the regexp requirements for our args
        self.req_regs = {}
        for key, val in self.reqs.iteritems():
            self.req_regs[key] = re.compile('^' + val + '$')
        # Update our defaults and set new default keys if needed. defaults
        # needs to be saved
        (self.defaults, defaultkeys) = self._defaults(routekeys, reserved_keys,
                                                      kargs)
        # Save the maximum keys we could utilize
        self.maxkeys = defaultkeys | routekeys

        # Populate our minimum keys, and save a copy of our backward keys for
        # quicker generation later
        (self.minkeys, self.routebackwards) = self._minkeys(routelist[:])

        # Populate our hardcoded keys, these are ones that are set and don't
        # exist in the route
        self.hardcoded = frozenset([key for key in self.maxkeys \
            if key not in routekeys and self.defaults[key] is not None])

        # Cache our default keys
        self._default_keys = frozenset(self.defaults.keys())
Beispiel #42
0
 def validateAttributeValueDir(self, token, tagName, attrName, attrValue):
     for t in self.checkEnumeratedValue(token, tagName, attrName, attrValue,
                                        frozenset(('ltr', 'rtl'))) or []:
         yield t
Beispiel #43
0
def _powerset(iterator):
    first = frozenset([iterator.next()])
    yield first
    for s in _powerset(iterator):
        yield s
        yield s | first
    "unexpected-end-tag-after-frameset":
    _(u"Unexpected end tag (%(name)s)"
      u" in the after frameset phase. Ignored."),
    "expected-eof-but-got-char":
    _(u"Unexpected non-space characters. Expected end of file."),
    "expected-eof-but-got-start-tag":
    _(u"Unexpected start tag (%(name)s)"
      u". Expected end of file."),
    "expected-eof-but-got-end-tag":
    _(u"Unexpected end tag (%(name)s)"
      u". Expected end of file."),
}

contentModelFlags = {"PCDATA": 0, "RCDATA": 1, "CDATA": 2, "PLAINTEXT": 3}

scopingElements = frozenset(
    ("button", "caption", "html", "marquee", "object", "table", "td", "th"))

formattingElements = frozenset(("a", "b", "big", "em", "font", "i", "nobr",
                                "s", "small", "strike", "strong", "tt", "u"))

specialElements = frozenset(
    ("address", "area", "base", "basefont", "bgsound", "blockquote", "body",
     "br", "center", "col", "colgroup", "dd", "dir", "div", "dl", "dt",
     "embed", "fieldset", "form", "frame", "frameset", "h1", "h2", "h3", "h4",
     "h5", "h6", "head", "hr", "iframe", "image", "img", "input", "isindex",
     "li", "link", "listing", "menu", "meta", "noembed", "noframes",
     "noscript", "ol", "optgroup", "option", "p", "param", "plaintext", "pre",
     "script", "select", "spacer", "style", "tbody", "textarea", "tfoot",
     "thead", "title", "tr", "ul", "wbr"))

spaceCharacters = frozenset((u"\t", u"\n", u"\u000B", u"\u000C", u" ", u"\r"))
Beispiel #45
0
class HTMLSanitizer(HTMLParser):

    safe_tags = frozenset([
        'a', 'abbr', 'acronym', 'address', 'area', 'b', 'big', 'blockquote',
        'br', 'button', 'caption', 'center', 'cite', 'code', 'col', 'colgroup',
        'dd', 'del', 'dfn', 'dir', 'div', 'dl', 'dt', 'em', 'fieldset', 'font',
        'form', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'hr', 'i', 'img', 'input',
        'ins', 'kbd', 'label', 'legend', 'li', 'map', 'menu', 'ol', 'optgroup',
        'option', 'p', 'pre', 'q', 's', 'samp', 'select', 'small', 'span',
        'strike', 'strong', 'sub', 'sup', 'table', 'tbody', 'td', 'textarea',
        'tfoot', 'th', 'thead', 'tr', 'tt', 'u', 'ul', 'var'
    ])
    safe_attrs = frozenset([
        'abbr', 'accept', 'accept-charset', 'accesskey', 'action', 'align',
        'alt', 'axis', 'border', 'bgcolor', 'cellpadding', 'cellspacing',
        'char', 'charoff', 'charset', 'checked', 'cite', 'class', 'clear',
        'cols', 'colspan', 'color', 'compact', 'coords', 'datetime', 'dir',
        'disabled', 'enctype', 'for', 'frame', 'headers', 'height', 'href',
        'hreflang', 'hspace', 'id', 'ismap', 'label', 'lang', 'longdesc',
        'maxlength', 'media', 'method', 'multiple', 'name', 'nohref',
        'noshade', 'nowrap', 'prompt', 'readonly', 'rel', 'rev', 'rows',
        'rowspan', 'rules', 'scope', 'selected', 'shape', 'size', 'span',
        'src', 'start', 'style', 'summary', 'tabindex', 'target', 'title',
        'type', 'usemap', 'valign', 'value', 'vspace', 'width'
    ])
    ignore_tags = frozenset(['html', 'body'])

    uri_attrs = frozenset(
        ['action', 'background', 'dynsrc', 'href', 'lowsrc', 'src'])
    safe_schemes = frozenset(['file', 'ftp', 'http', 'https', 'mailto', None])

    def __init__(self, out):
        HTMLParser.__init__(self)
        self.out = out
        self.waiting_for = None

    def handle_starttag(self, tag, attrs):
        if self.waiting_for:
            return
        if tag in self.ignore_tags:
            return

        if tag not in self.safe_tags:
            self.waiting_for = tag
            return
        self.out.write('<' + tag)

        def _get_scheme(text):
            if ':' not in text:
                return None
            chars = [char for char in text.split(':', 1)[0] if char.isalnum()]
            return ''.join(chars).lower()

        for attrname, attrval in attrs:
            if attrname not in self.safe_attrs:
                continue
            elif attrname in self.uri_attrs:
                # Don't allow URI schemes such as "javascript:"
                if _get_scheme(attrval) not in self.safe_schemes:
                    continue
            elif attrname == 'style':
                # Remove dangerous CSS declarations from inline styles
                decls = []
                for decl in filter(None, attrval.split(';')):
                    is_evil = False
                    if 'expression' in decl:
                        is_evil = True
                    for m in re.finditer(r'url\s*\(([^)]+)', decl):
                        if _get_scheme(m.group(1)) not in self.safe_schemes:
                            is_evil = True
                            break
                    if not is_evil:
                        decls.append(decl.strip())
                if not decls:
                    continue
                attrval = '; '.join(decls)
            self.out.write(' ' + attrname + '="' + escape(attrval) + '"')

        if tag in _EMPTY_TAGS:
            self.out.write(' />')
        else:
            self.out.write('>')

    def handle_entityref(self, name):
        if not self.waiting_for:
            self.out.write('&%s;' % name)

    def handle_data(self, data):
        if not self.waiting_for:
            self.out.write(escape(data, quotes=False))

    def handle_endtag(self, tag):
        if tag in self.ignore_tags:
            return

        if self.waiting_for:
            if self.waiting_for == tag:
                self.waiting_for = None
            return
        if tag not in _EMPTY_TAGS:
            self.out.write('</' + tag + '>')
Beispiel #46
0
    _(u"Invalid URI: '%(attributeName)s' attribute on <%(tagName)s>."),
    "invalid-scheme":
    _(u"Unregistered URI scheme: '%(attributeName)s' attribute on <%(tagName)s>."
      ),
    "invalid-rel":
    _(u"Invalid link relation: '%(attributeName)s' attribute on <%(tagName)s>."
      ),
    "invalid-mime-type":
    _(u"Invalid MIME type: '%(attributeName)s' attribute on <%(tagName)s>."),
})

globalAttributes = frozenset(
    ('class', 'contenteditable', 'contextmenu', 'dir', 'draggable', 'id',
     'irrelevant', 'lang', 'ref', 'tabindex', 'template', 'title', 'onabort',
     'onbeforeunload', 'onblur', 'onchange', 'onclick', 'oncontextmenu',
     'ondblclick', 'ondrag', 'ondragend', 'ondragenter', 'ondragleave',
     'ondragover', 'ondragstart', 'ondrop', 'onerror', 'onfocus', 'onkeydown',
     'onkeypress', 'onkeyup', 'onload', 'onmessage', 'onmousedown',
     'onmousemove', 'onmouseout', 'onmouseover', 'onmouseup', 'onmousewheel',
     'onresize', 'onscroll', 'onselect', 'onsubmit', 'onunload'))
# XXX lang in HTML only, xml:lang in XHTML only
# XXX validate ref, template

allowedAttributeMap = {
    'html':
    frozenset(('xmlns', )),
    'head':
    frozenset(()),
    'title':
    frozenset(()),
    'base':
Beispiel #47
0
    def generate(self, *args, **kargs):
        """Generate a route from a set of keywords
        
        Returns the url text, or None if no URL could be generated.
        
        .. code-block:: python
            
            m.generate(controller='content',action='view',id=10)
        
        """
        # Generate ourself if we haven't already
        if not self._created_gens:
            self._create_gens()

        if self.append_slash:
            kargs['_append_slash'] = True

        if not self.explicit:
            if 'controller' not in kargs:
                kargs['controller'] = 'content'
            if 'action' not in kargs:
                kargs['action'] = 'index'

        controller = kargs.get('controller', None)
        action = kargs.get('action', None)

        # If the URL didn't depend on the SCRIPT_NAME, we'll cache it
        # keyed by just by kargs; otherwise we need to cache it with
        # both SCRIPT_NAME and kargs:
        cache_key = unicode(args).encode('utf8') + \
            unicode(kargs).encode('utf8')

        if self.urlcache is not None:
            if self.environ:
                cache_key_script_name = '%s:%s' % (self.environ.get(
                    'SCRIPT_NAME', ''), cache_key)
            else:
                cache_key_script_name = cache_key

            # Check the url cache to see if it exists, use it if it does
            for key in [cache_key, cache_key_script_name]:
                if key in self.urlcache:
                    return self.urlcache[key]

        actionlist = self._gendict.get(controller) or self._gendict.get(
            '*', {})
        if not actionlist and not args:
            return None
        (keylist, sortcache) = actionlist.get(action) or \
                               actionlist.get('*', (None, {}))
        if not keylist and not args:
            return None

        keys = frozenset(kargs.keys())
        cacheset = False
        cachekey = unicode(keys)
        cachelist = sortcache.get(cachekey)
        if args:
            keylist = args
        elif cachelist:
            keylist = cachelist
        else:
            cacheset = True
            newlist = []
            for route in keylist:
                if len(route.minkeys - keys) == 0:
                    newlist.append(route)
            keylist = newlist

            def keysort(a, b):
                """Sorts two sets of sets, to order them ideally for
                matching."""
                am = a.minkeys
                a = a.maxkeys
                b = b.maxkeys

                lendiffa = len(keys ^ a)
                lendiffb = len(keys ^ b)
                # If they both match, don't switch them
                if lendiffa == 0 and lendiffb == 0:
                    return 0

                # First, if a matches exactly, use it
                if lendiffa == 0:
                    return -1

                # Or b matches exactly, use it
                if lendiffb == 0:
                    return 1

                # Neither matches exactly, return the one with the most in
                # common
                if cmp(lendiffa, lendiffb) != 0:
                    return cmp(lendiffa, lendiffb)

                # Neither matches exactly, but if they both have just as much
                # in common
                if len(keys & b) == len(keys & a):
                    # Then we return the shortest of the two
                    return cmp(len(a), len(b))

                # Otherwise, we return the one that has the most in common
                else:
                    return cmp(len(keys & b), len(keys & a))

            keylist.sort(keysort)
            if cacheset:
                sortcache[cachekey] = keylist

        # Iterate through the keylist of sorted routes (or a single route if
        # it was passed in explicitly for hardcoded named routes)
        for route in keylist:
            fail = False
            for key in route.hardcoded:
                kval = kargs.get(key)
                if not kval:
                    continue
                if isinstance(kval, str):
                    kval = kval.decode(self.encoding)
                else:
                    kval = unicode(kval)
                if kval != route.defaults[key]:
                    fail = True
                    break
            if fail:
                continue
            path = route.generate(**kargs)
            if path:
                if self.prefix:
                    path = self.prefix + path
                external_static = route.static and route.external
                if self.environ and self.environ.get('SCRIPT_NAME', '') != ''\
                    and not route.absolute and not external_static:
                    path = self.environ['SCRIPT_NAME'] + path
                    key = cache_key_script_name
                else:
                    key = cache_key
                if self.urlcache is not None:
                    self.urlcache[key] = str(path)
                return str(path)
            else:
                continue
        return None
Beispiel #48
0
from html5lib.constants import scopingElements, tableInsertModeElements, namespaces
try:
    frozenset
except NameError:
    # Import from the sets module for python 2.3
    from sets import Set as set
    from sets import ImmutableSet as frozenset

# The scope markers are inserted when entering object elements,
# marquees, table cells, and table captions, and are used to prevent formatting
# from "leaking" into tables, object elements, and marquees.
Marker = None

listElementsMap = {
    None: (frozenset(scopingElements), False),
    "button":
    (frozenset(scopingElements | set([(namespaces["html"], "button")])),
     False),
    "list":
    (frozenset(scopingElements | set([(namespaces["html"],
                                       "ol"), (namespaces["html"], "ul")])),
     False),
    "table": (frozenset([(namespaces["html"], "html"),
                         (namespaces["html"], "table")]), False),
    "select": (frozenset([(namespaces["html"], "optgroup"),
                          (namespaces["html"], "option")]), True)
}


class Node(object):
    def __init__(self, name):
Beispiel #49
0
    def closeTagOpenState(self):
        if (self.contentModelFlag
                in (contentModelFlags["RCDATA"], contentModelFlags["CDATA"])):
            if self.currentToken:
                charStack = []

                # So far we know that "</" has been consumed. We now need to know
                # whether the next few characters match the name of last emitted
                # start tag which also happens to be the currentToken. We also need
                # to have the character directly after the characters that could
                # match the start tag name.
                for x in xrange(len(self.currentToken["name"]) + 1):
                    charStack.append(self.stream.char())
                    # Make sure we don't get hit by EOF
                    if charStack[-1] == EOF:
                        break

                # Since this is just for checking. We put the characters back on
                # the stack.
                self.stream.unget(charStack)

            if self.currentToken \
              and self.currentToken["name"].lower() == "".join(charStack[:-1]).lower() \
              and charStack[-1] in (spaceCharacters |
              frozenset((u">", u"/", u"<", EOF))):
                # Because the characters are correct we can safely switch to
                # PCDATA mode now. This also means we don't have to do it when
                # emitting the end tag token.
                self.contentModelFlag = contentModelFlags["PCDATA"]
            else:
                self.tokenQueue.append({"type": "Characters", "data": u"</"})
                self.state = self.states["data"]

                # Need to return here since we don't want the rest of the
                # method to be walked through.
                return True

        data = self.stream.char()
        if data in asciiLetters:
            self.currentToken = {"type": "EndTag", "name": data, "data": []}
            self.state = self.states["tagName"]
        elif data == u">":
            self.tokenQueue.append({
                "type":
                "ParseError",
                "data":
                "expected-closing-tag-but-got-right-bracket"
            })
            self.state = self.states["data"]
        elif data == EOF:
            self.tokenQueue.append({
                "type": "ParseError",
                "data": "expected-closing-tag-but-got-eof"
            })
            self.tokenQueue.append({"type": "Characters", "data": u"</"})
            self.state = self.states["data"]
        else:
            # XXX data can be _'_...
            self.tokenQueue.append({
                "type": "ParseError",
                "data": "expected-closing-tag-but-got-char",
                "datavars": {
                    "data": data
                }
            })
            self.stream.unget(data)
            self.state = self.states["bogusComment"]
        return True
Beispiel #50
0
"""
# Last synced with Rails copy at Revision 6057 on Feb 9th, 2007.

import sys
if sys.version < '2.4':
    from sets import ImmutableSet as frozenset

from javascript import *
from javascript import options_for_javascript
from form_tag import form
from tags import tag, camelize
from urls import get_url

CALLBACKS = frozenset(['uninitialized', 'loading', 'loaded',
                       'interactive', 'complete', 'failure', 'success'] + \
                          [str(x) for x in range(100,599)])
AJAX_OPTIONS = frozenset(['before', 'after', 'condition', 'url',
                          'asynchronous', 'method', 'insertion', 'position',
                          'form', 'with', 'with_', 'update', 'script'] + \
                             list(CALLBACKS))


def link_to_remote(name, options=None, **html_options):
    """
    Links to a remote function.
    
    Returns a link to a remote action defined ``dict(url=url())``
    (using the url() format) that's called in the background using 
    XMLHttpRequest. The result of that request can then be inserted into 
    a DOM object whose id can be specified with the ``update`` keyword. 
Beispiel #51
0
class XHTMLSerializer(XMLSerializer):
    """Produces XHTML text from an event stream.
    
    >>> from libs.genshi.builder import tag
    >>> elem = tag.div(tag.a(href='foo'), tag.br, tag.hr(noshade=True))
    >>> print ''.join(XHTMLSerializer()(elem.generate()))
    <div><a href="foo"></a><br /><hr noshade="noshade" /></div>
    """

    _EMPTY_ELEMS = frozenset([
        'area', 'base', 'basefont', 'br', 'col', 'frame', 'hr', 'img', 'input',
        'isindex', 'link', 'meta', 'param'
    ])
    _BOOLEAN_ATTRS = frozenset([
        'selected', 'checked', 'compact', 'declare', 'defer', 'disabled',
        'ismap', 'multiple', 'nohref', 'noresize', 'noshade', 'nowrap'
    ])
    _PRESERVE_SPACE = frozenset([
        QName('pre'),
        QName('http://www.w3.org/1999/xhtml}pre'),
        QName('textarea'),
        QName('http://www.w3.org/1999/xhtml}textarea')
    ])

    def __init__(self,
                 doctype=None,
                 strip_whitespace=True,
                 namespace_prefixes=None):
        super(XHTMLSerializer, self).__init__(doctype, False)
        self.filters = [EmptyTagFilter()]
        if strip_whitespace:
            self.filters.append(WhitespaceFilter(self._PRESERVE_SPACE))
        namespace_prefixes = namespace_prefixes or {}
        namespace_prefixes['http://www.w3.org/1999/xhtml'] = ''
        self.filters.append(NamespaceFlattener(prefixes=namespace_prefixes))

    def __call__(self, stream):
        boolean_attrs = self._BOOLEAN_ATTRS
        empty_elems = self._EMPTY_ELEMS
        have_doctype = False
        in_cdata = False

        stream = chain(self.preamble, stream)
        for filter_ in self.filters:
            stream = filter_(stream)
        for kind, data, pos in stream:

            if kind is START or kind is EMPTY:
                tag, attrib = data
                buf = ['<', tag]
                for attr, value in attrib:
                    if attr in boolean_attrs:
                        value = attr
                    buf += [' ', attr, '="', escape(value), '"']
                if kind is EMPTY:
                    if tag in empty_elems:
                        buf.append(' />')
                    else:
                        buf.append('></%s>' % tag)
                else:
                    buf.append('>')
                yield Markup(u''.join(buf))

            elif kind is END:
                yield Markup('</%s>' % data)

            elif kind is TEXT:
                if in_cdata:
                    yield data
                else:
                    yield escape(data, quotes=False)

            elif kind is COMMENT:
                yield Markup('<!--%s-->' % data)

            elif kind is DOCTYPE and not have_doctype:
                name, pubid, sysid = data
                buf = ['<!DOCTYPE %s']
                if pubid:
                    buf.append(' PUBLIC "%s"')
                elif sysid:
                    buf.append(' SYSTEM')
                if sysid:
                    buf.append(' "%s"')
                buf.append('>\n')
                yield Markup(u''.join(buf), *filter(None, data))
                have_doctype = True

            elif kind is START_CDATA:
                yield Markup('<![CDATA[')
                in_cdata = True

            elif kind is END_CDATA:
                yield Markup(']]>')
                in_cdata = False

            elif kind is PI:
                yield Markup('<?%s %s?>' % data)
Beispiel #52
0
class Translator(object):
    """Can extract and translate localizable strings from markup streams and
    templates.
    
    For example, assume the followng template:
    
    >>> from genshi.template import MarkupTemplate
    >>> 
    >>> tmpl = MarkupTemplate('''<html xmlns:py="http://genshi.edgewall.org/">
    ...   <head>
    ...     <title>Example</title>
    ...   </head>
    ...   <body>
    ...     <h1>Example</h1>
    ...     <p>${_("Hello, %(name)s") % dict(name=username)}</p>
    ...   </body>
    ... </html>''', filename='example.html')
    
    For demonstration, we define a dummy ``gettext``-style function with a
    hard-coded translation table, and pass that to the `Translator` initializer:
    
    >>> def pseudo_gettext(string):
    ...     return {
    ...         'Example': 'Beispiel',
    ...         'Hello, %(name)s': 'Hallo, %(name)s'
    ...     }[string]
    >>> 
    >>> translator = Translator(pseudo_gettext)
    
    Next, the translator needs to be prepended to any already defined filters
    on the template:
    
    >>> tmpl.filters.insert(0, translator)
    
    When generating the template output, our hard-coded translations should be
    applied as expected:
    
    >>> print tmpl.generate(username='******', _=pseudo_gettext)
    <html>
      <head>
        <title>Beispiel</title>
      </head>
      <body>
        <h1>Beispiel</h1>
        <p>Hallo, Hans</p>
      </body>
    </html>

    Note that elements defining ``xml:lang`` attributes that do not contain
    variable expressions are ignored by this filter. That can be used to
    exclude specific parts of a template from being extracted and translated.
    """

    IGNORE_TAGS = frozenset([
        QName('script'),
        QName('http://www.w3.org/1999/xhtml}script'),
        QName('style'),
        QName('http://www.w3.org/1999/xhtml}style')
    ])
    INCLUDE_ATTRS = frozenset(
        ['abbr', 'alt', 'label', 'prompt', 'standby', 'summary', 'title'])

    def __init__(self,
                 translate=gettext,
                 ignore_tags=IGNORE_TAGS,
                 include_attrs=INCLUDE_ATTRS,
                 extract_text=True):
        """Initialize the translator.
        
        :param translate: the translation function, for example ``gettext`` or
                          ``ugettext``.
        :param ignore_tags: a set of tag names that should not be localized
        :param include_attrs: a set of attribute names should be localized
        :param extract_text: whether the content of text nodes should be
                             extracted, or only text in explicit ``gettext``
                             function calls
        """
        self.translate = translate
        self.ignore_tags = ignore_tags
        self.include_attrs = include_attrs
        self.extract_text = extract_text

    def __call__(self, stream, ctxt=None, search_text=True):
        """Translate any localizable strings in the given stream.
        
        This function shouldn't be called directly. Instead, an instance of
        the `Translator` class should be registered as a filter with the
        `Template` or the `TemplateLoader`, or applied as a regular stream
        filter. If used as a template filter, it should be inserted in front of
        all the default filters.
        
        :param stream: the markup event stream
        :param ctxt: the template context (not used)
        :param search_text: whether text nodes should be translated (used
                            internally)
        :return: the localized stream
        """
        ignore_tags = self.ignore_tags
        include_attrs = self.include_attrs
        translate = self.translate
        if not self.extract_text:
            search_text = False
        skip = 0
        xml_lang = XML_NAMESPACE['lang']

        for kind, data, pos in stream:

            # skip chunks that should not be localized
            if skip:
                if kind is START:
                    skip += 1
                elif kind is END:
                    skip -= 1
                yield kind, data, pos
                continue

            # handle different events that can be localized
            if kind is START:
                tag, attrs = data
                if tag in self.ignore_tags or \
                        isinstance(attrs.get(xml_lang), basestring):
                    skip += 1
                    yield kind, data, pos
                    continue

                new_attrs = []
                changed = False
                for name, value in attrs:
                    newval = value
                    if search_text and isinstance(value, basestring):
                        if name in include_attrs:
                            newval = self.translate(value)
                    else:
                        newval = list(
                            self(_ensure(value), ctxt, search_text=False))
                    if newval != value:
                        value = newval
                        changed = True
                    new_attrs.append((name, value))
                if changed:
                    attrs = new_attrs

                yield kind, (tag, attrs), pos

            elif search_text and kind is TEXT:
                text = data.strip()
                if text:
                    data = data.replace(text, translate(text))
                yield kind, data, pos

            elif kind is SUB:
                subkind, substream = data
                new_substream = list(self(substream, ctxt))
                yield kind, (subkind, new_substream), pos

            else:
                yield kind, data, pos

    GETTEXT_FUNCTIONS = ('_', 'gettext', 'ngettext', 'dgettext', 'dngettext',
                         'ugettext', 'ungettext')

    def extract(self,
                stream,
                gettext_functions=GETTEXT_FUNCTIONS,
                search_text=True):
        """Extract localizable strings from the given template stream.
        
        For every string found, this function yields a ``(lineno, function,
        message)`` tuple, where:
        
        * ``lineno`` is the number of the line on which the string was found,
        * ``function`` is the name of the ``gettext`` function used (if the
          string was extracted from embedded Python code), and
        *  ``message`` is the string itself (a ``unicode`` object, or a tuple
           of ``unicode`` objects for functions with multiple string arguments).
        
        >>> from genshi.template import MarkupTemplate
        >>> 
        >>> tmpl = MarkupTemplate('''<html xmlns:py="http://genshi.edgewall.org/">
        ...   <head>
        ...     <title>Example</title>
        ...   </head>
        ...   <body>
        ...     <h1>Example</h1>
        ...     <p>${_("Hello, %(name)s") % dict(name=username)}</p>
        ...     <p>${ngettext("You have %d item", "You have %d items", num)}</p>
        ...   </body>
        ... </html>''', filename='example.html')
        >>> 
        >>> for lineno, funcname, message in Translator().extract(tmpl.stream):
        ...    print "%d, %r, %r" % (lineno, funcname, message)
        3, None, u'Example'
        6, None, u'Example'
        7, '_', u'Hello, %(name)s'
        8, 'ngettext', (u'You have %d item', u'You have %d items')
        
        :param stream: the event stream to extract strings from; can be a
                       regular stream or a template stream
        :param gettext_functions: a sequence of function names that should be
                                  treated as gettext-style localization
                                  functions
        :param search_text: whether the content of text nodes should be
                            extracted (used internally)
        
        :note: Changed in 0.4.1: For a function with multiple string arguments
               (such as ``ngettext``), a single item with a tuple of strings is
               yielded, instead an item for each string argument.
        """
        tagname = None
        if not self.extract_text:
            search_text = False
        skip = 0
        xml_lang = XML_NAMESPACE['lang']

        for kind, data, pos in stream:

            if skip:
                if kind is START:
                    skip += 1
                if kind is END:
                    skip -= 1

            if kind is START and not skip:
                tag, attrs = data
                if tag in self.ignore_tags or \
                        isinstance(attrs.get(xml_lang), basestring):
                    skip += 1
                    continue

                for name, value in attrs:
                    if search_text and isinstance(value, basestring):
                        if name in self.include_attrs:
                            text = value.strip()
                            if text:
                                yield pos[1], None, text
                    else:
                        for lineno, funcname, text in self.extract(
                                _ensure(value),
                                gettext_functions,
                                search_text=False):
                            yield lineno, funcname, text

            elif not skip and search_text and kind is TEXT:
                text = data.strip()
                if text and filter(None, [ch.isalpha() for ch in text]):
                    yield pos[1], None, text

            elif kind is EXPR or kind is EXEC:
                consts = dict([(n, chr(i) + '\x00')
                               for i, n in enumerate(data.code.co_consts)])
                gettext_locs = [
                    consts[n] for n in gettext_functions if n in consts
                ]
                ops = [
                    _LOAD_CONST, '(', '|'.join(gettext_locs), ')',
                    _CALL_FUNCTION, '.\x00', '((?:', _BINARY_ADD, '|',
                    _LOAD_CONST, '.\x00)+)'
                ]
                for loc, opcodes in re.findall(''.join(ops),
                                               data.code.co_code):
                    funcname = data.code.co_consts[ord(loc[0])]
                    strings = []
                    opcodes = iter(opcodes)
                    for opcode in opcodes:
                        if opcode == _BINARY_ADD:
                            arg = strings.pop()
                            strings[-1] += arg
                        else:
                            arg = data.code.co_consts[ord(opcodes.next())]
                            opcodes.next()  # skip second byte
                            if not isinstance(arg, basestring):
                                break
                            strings.append(unicode(arg))
                    if len(strings) == 1:
                        strings = strings[0]
                    else:
                        strings = tuple(strings)
                    yield pos[1], funcname, strings

            elif kind is SUB:
                subkind, substream = data
                messages = self.extract(substream,
                                        gettext_functions,
                                        search_text=search_text and not skip)
                for lineno, funcname, text in messages:
                    yield lineno, funcname, text
Beispiel #53
0
class DB(object):
    """Wrapper class for the _pg connection type."""
    def __init__(self, *args, **kw):
        """Create a new connection.

        You can pass either the connection parameters or an existing
        _pg or pgdb connection. This allows you to use the methods
        of the classic pg interface with a DB-API 2 pgdb connection.

        """
        if not args and len(kw) == 1:
            db = kw.get('db')
        elif not kw and len(args) == 1:
            db = args[0]
        else:
            db = None
        if db:
            if isinstance(db, DB):
                db = db.db
            else:
                try:
                    db = db._cnx
                except AttributeError:
                    pass
        if not db or not hasattr(db, 'db') or not hasattr(db, 'query'):
            db = connect(*args, **kw)
            self._closeable = True
        else:
            self._closeable = False
        self.db = db
        self.dbname = db.db
        self._regtypes = False
        self._attnames = {}
        self._pkeys = {}
        self._privileges = {}
        self._args = args, kw
        self.debug = None  # For debugging scripts, this can be set
        # * to a string format specification (e.g. in CGI set to "%s<BR>"),
        # * to a file object to write debug statements or
        # * to a callable object which takes a string argument
        # * to any other true value to just print debug statements

    def __getattr__(self, name):
        # All undefined members are same as in underlying pg connection:
        if self.db:
            return getattr(self.db, name)
        else:
            raise _int_error('Connection is not valid')

    # Context manager methods

    def __enter__(self):
        """Enter the runtime context. This will start a transaction."""
        self.begin()
        return self

    def __exit__(self, et, ev, tb):
        """Exit the runtime context. This will end the transaction."""
        if et is None and ev is None and tb is None:
            self.commit()
        else:
            self.rollback()

    # Auxiliary methods

    def _do_debug(self, s):
        """Print a debug message."""
        if self.debug:
            if isinstance(self.debug, basestring):
                print self.debug % s
            elif isinstance(self.debug, file):
                file.write(s + '\n')
            elif callable(self.debug):
                self.debug(s)
            else:
                print s

    def _quote_text(self, d):
        """Quote text value."""
        if not isinstance(d, basestring):
            d = str(d)
        return "'%s'" % self.escape_string(d)

    _bool_true = frozenset('t true 1 y yes on'.split())

    def _quote_bool(self, d):
        """Quote boolean value."""
        if isinstance(d, basestring):
            if not d:
                return 'NULL'
            d = d.lower() in self._bool_true
        else:
            d = bool(d)
        return ("'f'", "'t'")[d]

    _date_literals = frozenset(
        'current_date current_time'
        ' current_timestamp localtime localtimestamp'.split())

    def _quote_date(self, d):
        """Quote date value."""
        if not d:
            return 'NULL'
        if isinstance(d, basestring) and d.lower() in self._date_literals:
            return d
        return self._quote_text(d)

    def _quote_num(self, d):
        """Quote numeric value."""
        if not d and d != 0:
            return 'NULL'
        return str(d)

    def _quote_money(self, d):
        """Quote money value."""
        if d is None or d == '':
            return 'NULL'
        if not isinstance(d, basestring):
            d = str(d)
        return d

    _quote_funcs = dict(  # quote methods for each type
        text=_quote_text,
        bool=_quote_bool,
        date=_quote_date,
        int=_quote_num,
        num=_quote_num,
        float=_quote_num,
        money=_quote_money)

    def _quote(self, d, t):
        """Return quotes if needed."""
        if d is None:
            return 'NULL'
        try:
            quote_func = self._quote_funcs[t]
        except KeyError:
            quote_func = self._quote_funcs['text']
        return quote_func(self, d)

    def _split_schema(self, cl):
        """Return schema and name of object separately.

        This auxiliary function splits off the namespace (schema)
        belonging to the class with the name cl. If the class name
        is not qualified, the function is able to determine the schema
        of the class, taking into account the current search path.

        """
        s = _split_parts(cl)
        if len(s) > 1:  # name already qualfied?
            # should be database.schema.table or schema.table
            if len(s) > 3:
                raise _prg_error('Too many dots in class name %s' % cl)
            schema, cl = s[-2:]
        else:
            cl = s[0]
            # determine search path
            q = 'SELECT current_schemas(TRUE)'
            schemas = self.db.query(q).getresult()[0][0][1:-1].split(',')
            if schemas:  # non-empty path
                # search schema for this object in the current search path
                q = ' UNION '.join([
                    "SELECT %d::integer AS n, '%s'::name AS nspname" % s
                    for s in enumerate(schemas)
                ])
                q = ("SELECT nspname FROM pg_class"
                     " JOIN pg_namespace"
                     " ON pg_class.relnamespace = pg_namespace.oid"
                     " JOIN (%s) AS p USING (nspname)"
                     " WHERE pg_class.relname = '%s'"
                     " ORDER BY n LIMIT 1" % (q, cl))
                schema = self.db.query(q).getresult()
                if schema:  # schema found
                    schema = schema[0][0]
                else:  # object not found in current search path
                    schema = 'public'
            else:  # empty path
                schema = 'public'
        return schema, cl

    def _add_schema(self, cl):
        """Ensure that the class name is prefixed with a schema name."""
        return _join_parts(self._split_schema(cl))

    # Public methods

    # escape_string and escape_bytea exist as methods,
    # so we define unescape_bytea as a method as well
    unescape_bytea = staticmethod(unescape_bytea)

    def close(self):
        """Close the database connection."""
        # Wraps shared library function so we can track state.
        if self._closeable:
            if self.db:
                self.db.close()
                self.db = None
            else:
                raise _int_error('Connection already closed')

    def reset(self):
        """Reset connection with current parameters.

        All derived queries and large objects derived from this connection
        will not be usable after this call.

        """
        if self.db:
            self.db.reset()
        else:
            raise _int_error('Connection already closed')

    def reopen(self):
        """Reopen connection to the database.

        Used in case we need another connection to the same database.
        Note that we can still reopen a database that we have closed.

        """
        # There is no such shared library function.
        if self._closeable:
            db = connect(*self._args[0], **self._args[1])
            if self.db:
                self.db.close()
            self.db = db

    def begin(self, mode=None):
        """Begin a transaction."""
        qstr = 'BEGIN'
        if mode:
            qstr += ' ' + mode
        return self.query(qstr)

    start = begin

    def commit(self):
        """Commit the current transaction."""
        return self.query('COMMIT')

    end = commit

    def rollback(self, name=None):
        """Rollback the current transaction."""
        qstr = 'ROLLBACK'
        if name:
            qstr += ' TO ' + name
        return self.query(qstr)

    def savepoint(self, name=None):
        """Define a new savepoint within the current transaction."""
        qstr = 'SAVEPOINT'
        if name:
            qstr += ' ' + name
        return self.query(qstr)

    def release(self, name):
        """Destroy a previously defined savepoint."""
        return self.query('RELEASE ' + name)

    def query(self, qstr, *args):
        """Executes a SQL command string.

        This method simply sends a SQL query to the database. If the query is
        an insert statement that inserted exactly one row into a table that
        has OIDs, the return value is the OID of the newly inserted row.
        If the query is an update or delete statement, or an insert statement
        that did not insert exactly one row in a table with OIDs, then the
        numer of rows affected is returned as a string. If it is a statement
        that returns rows as a result (usually a select statement, but maybe
        also an "insert/update ... returning" statement), this method returns
        a pgqueryobject that can be accessed via getresult() or dictresult()
        or simply printed. Otherwise, it returns `None`.

        The query can contain numbered parameters of the form $1 in place
        of any data constant. Arguments given after the query string will
        be substituted for the corresponding numbered parameter. Parameter
        values can also be given as a single list or tuple argument.

        Note that the query string must not be passed as a unicode value,
        but you can pass arguments as unicode values if they can be decoded
        using the current client encoding.

        """
        # Wraps shared library function for debugging.
        if not self.db:
            raise _int_error('Connection is not valid')
        self._do_debug(qstr)
        return self.db.query(qstr, args)

    def pkey(self, cl, newpkey=None):
        """This method gets or sets the primary key of a class.

        Composite primary keys are represented as frozensets. Note that
        this raises an exception if the table does not have a primary key.

        If newpkey is set and is not a dictionary then set that
        value as the primary key of the class.  If it is a dictionary
        then replace the _pkeys dictionary with a copy of it.

        """
        # First see if the caller is supplying a dictionary
        if isinstance(newpkey, dict):
            # make sure that all classes have a namespace
            self._pkeys = dict([('.' in cl and cl or 'public.' + cl, pkey)
                                for cl, pkey in newpkey.iteritems()])
            return self._pkeys

        qcl = self._add_schema(cl)  # build fully qualified class name
        # Check if the caller is supplying a new primary key for the class
        if newpkey:
            self._pkeys[qcl] = newpkey
            return newpkey

        # Get all the primary keys at once
        if qcl not in self._pkeys:
            # if not found, check again in case it was added after we started
            self._pkeys = {}
            if self.server_version >= 80200:
                # the ANY syntax works correctly only with PostgreSQL >= 8.2
                any_indkey = "= ANY (pg_index.indkey)"
            else:
                any_indkey = "IN (%s)" % ', '.join(
                    ['pg_index.indkey[%d]' % i for i in range(16)])
            for r in self.db.query(
                    "SELECT pg_namespace.nspname, pg_class.relname,"
                    " pg_attribute.attname FROM pg_class"
                    " JOIN pg_namespace"
                    " ON pg_namespace.oid = pg_class.relnamespace"
                    " AND pg_namespace.nspname NOT LIKE 'pg_%'"
                    " JOIN pg_attribute ON pg_attribute.attrelid = pg_class.oid"
                    " AND pg_attribute.attisdropped = 'f'"
                    " JOIN pg_index ON pg_index.indrelid = pg_class.oid"
                    " AND pg_index.indisprimary = 't'"
                    " AND pg_attribute.attnum " + any_indkey).getresult():
                cl, pkey = _join_parts(r[:2]), r[2]
                self._pkeys.setdefault(cl, []).append(pkey)
            # (only) for composite primary keys, the values will be frozensets
            for cl, pkey in self._pkeys.iteritems():
                self._pkeys[cl] = len(pkey) > 1 and frozenset(pkey) or pkey[0]
            self._do_debug(self._pkeys)

        # will raise an exception if primary key doesn't exist
        return self._pkeys[qcl]

    def get_databases(self):
        """Get list of databases in the system."""
        return [
            s[0] for s in self.db.query(
                'SELECT datname FROM pg_database').getresult()
        ]

    def get_relations(self, kinds=None):
        """Get list of relations in connected database of specified kinds.

            If kinds is None or empty, all kinds of relations are returned.
            Otherwise kinds can be a string or sequence of type letters
            specifying which kind of relations you want to list.

        """
        where = kinds and "pg_class.relkind IN (%s) AND" % ','.join(
            ["'%s'" % x for x in kinds]) or ''
        return map(
            _join_parts,
            self.db.query(
                "SELECT pg_namespace.nspname, pg_class.relname "
                "FROM pg_class "
                "JOIN pg_namespace ON pg_namespace.oid = pg_class.relnamespace "
                "WHERE %s pg_class.relname !~ '^Inv' AND "
                "pg_class.relname !~ '^pg_' "
                "ORDER BY 1, 2" % where).getresult())

    def get_tables(self):
        """Return list of tables in connected database."""
        return self.get_relations('r')

    def get_attnames(self, cl, newattnames=None):
        """Given the name of a table, digs out the set of attribute names.

        Returns a dictionary of attribute names (the names are the keys,
        the values are the names of the attributes' types).
        If the optional newattnames exists, it must be a dictionary and
        will become the new attribute names dictionary.

        By default, only a limited number of simple types will be returned.
        You can get the regular types after calling use_regtypes(True).

        """
        if isinstance(newattnames, dict):
            self._attnames = newattnames
            return
        elif newattnames:
            raise _prg_error('If supplied, newattnames must be a dictionary')
        cl = self._split_schema(cl)  # split into schema and class
        qcl = _join_parts(cl)  # build fully qualified name
        # May as well cache them:
        if qcl in self._attnames:
            return self._attnames[qcl]
        if qcl not in self.get_relations('rv'):
            raise _prg_error('Class %s does not exist' % qcl)

        q = "SELECT pg_attribute.attname, pg_type.typname"
        if self._regtypes:
            q += "::regtype"
        q += (" FROM pg_class"
              " JOIN pg_namespace ON pg_class.relnamespace = pg_namespace.oid"
              " JOIN pg_attribute ON pg_attribute.attrelid = pg_class.oid"
              " JOIN pg_type ON pg_type.oid = pg_attribute.atttypid"
              " WHERE pg_namespace.nspname = '%s' AND pg_class.relname = '%s'"
              " AND (pg_attribute.attnum > 0 OR pg_attribute.attname = 'oid')"
              " AND pg_attribute.attisdropped = 'f'") % cl
        q = self.db.query(q).getresult()

        if self._regtypes:
            t = dict(q)
        else:
            t = {}
            for att, typ in q:
                if typ.startswith('bool'):
                    typ = 'bool'
                elif typ.startswith('abstime'):
                    typ = 'date'
                elif typ.startswith('date'):
                    typ = 'date'
                elif typ.startswith('interval'):
                    typ = 'date'
                elif typ.startswith('timestamp'):
                    typ = 'date'
                elif typ.startswith('oid'):
                    typ = 'int'
                elif typ.startswith('int'):
                    typ = 'int'
                elif typ.startswith('float'):
                    typ = 'float'
                elif typ.startswith('numeric'):
                    typ = 'num'
                elif typ.startswith('money'):
                    typ = 'money'
                else:
                    typ = 'text'
                t[att] = typ

        self._attnames[qcl] = t  # cache it
        return self._attnames[qcl]

    def use_regtypes(self, regtypes=None):
        """Use regular type names instead of simplified type names."""
        if regtypes is None:
            return self._regtypes
        else:
            regtypes = bool(regtypes)
            if regtypes != self._regtypes:
                self._regtypes = regtypes
                self._attnames.clear()
            return regtypes

    def has_table_privilege(self, cl, privilege='select'):
        """Check whether current user has specified table privilege."""
        qcl = self._add_schema(cl)
        privilege = privilege.lower()
        try:
            return self._privileges[(qcl, privilege)]
        except KeyError:
            q = "SELECT has_table_privilege('%s', '%s')" % (qcl, privilege)
            ret = self.db.query(q).getresult()[0][0] == 't'
            self._privileges[(qcl, privilege)] = ret
            return ret

    def get(self, cl, arg, keyname=None):
        """Get a tuple from a database table or view.

        This method is the basic mechanism to get a single row.  The keyname
        that the key specifies a unique row.  If keyname is not specified
        then the primary key for the table is used.  If arg is a dictionary
        then the value for the key is taken from it and it is modified to
        include the new values, replacing existing values where necessary.
        For a composite key, keyname can also be a sequence of key names.
        The OID is also put into the dictionary if the table has one, but
        in order to allow the caller to work with multiple tables, it is
        munged as oid(schema.table).

        """
        if cl.endswith('*'):  # scan descendant tables?
            cl = cl[:-1].rstrip()  # need parent table name
        # build qualified class name
        qcl = self._add_schema(cl)
        # To allow users to work with multiple tables,
        # we munge the name of the "oid" the key
        qoid = _oid_key(qcl)
        if not keyname:
            # use the primary key by default
            try:
                keyname = self.pkey(qcl)
            except KeyError:
                raise _prg_error('Class %s has no primary key' % qcl)
        # We want the oid for later updates if that isn't the key
        if keyname == 'oid':
            if isinstance(arg, dict):
                if qoid not in arg:
                    raise _db_error('%s not in arg' % qoid)
            else:
                arg = {qoid: arg}
            where = 'oid = %s' % arg[qoid]
            attnames = '*'
        else:
            attnames = self.get_attnames(qcl)
            if isinstance(keyname, basestring):
                keyname = (keyname, )
            if not isinstance(arg, dict):
                if len(keyname) > 1:
                    raise _prg_error('Composite key needs dict as arg')
                arg = dict([(k, arg) for k in keyname])
            where = ' AND '.join([
                '%s = %s' % (k, self._quote(arg[k], attnames[k]))
                for k in keyname
            ])
            attnames = ', '.join(attnames)
        q = 'SELECT %s FROM %s WHERE %s LIMIT 1' % (attnames, qcl, where)
        self._do_debug(q)
        res = self.db.query(q).dictresult()
        if not res:
            raise _db_error('No such record in %s where %s' % (qcl, where))
        for att, value in res[0].iteritems():
            arg[att == 'oid' and qoid or att] = value
        return arg

    def insert(self, cl, d=None, **kw):
        """Insert a tuple into a database table.

        This method inserts a row into a table.  If a dictionary is
        supplied it starts with that.  Otherwise it uses a blank dictionary.
        Either way the dictionary is updated from the keywords.

        The dictionary is then, if possible, reloaded with the values actually
        inserted in order to pick up values modified by rules, triggers, etc.

        Note: The method currently doesn't support insert into views
        although PostgreSQL does.

        """
        qcl = self._add_schema(cl)
        qoid = _oid_key(qcl)
        if d is None:
            d = {}
        d.update(kw)
        attnames = self.get_attnames(qcl)
        names, values = [], []
        for n in attnames:
            if n != 'oid' and n in d:
                names.append('"%s"' % n)
                values.append(self._quote(d[n], attnames[n]))
        names, values = ', '.join(names), ', '.join(values)
        selectable = self.has_table_privilege(qcl)
        if selectable and self.server_version >= 80200:
            ret = ' RETURNING %s*' % ('oid' in attnames and 'oid, ' or '')
        else:
            ret = ''
        q = 'INSERT INTO %s (%s) VALUES (%s)%s' % (qcl, names, values, ret)
        self._do_debug(q)
        res = self.db.query(q)
        if ret:
            res = res.dictresult()
            for att, value in res[0].iteritems():
                d[att == 'oid' and qoid or att] = value
        elif isinstance(res, int):
            d[qoid] = res
            if selectable:
                self.get(qcl, d, 'oid')
        elif selectable:
            if qoid in d:
                self.get(qcl, d, 'oid')
            else:
                try:
                    self.get(qcl, d)
                except ProgrammingError:
                    pass  # table has no primary key
        return d

    def update(self, cl, d=None, **kw):
        """Update an existing row in a database table.

        Similar to insert but updates an existing row.  The update is based
        on the OID value as munged by get or passed as keyword, or on the
        primary key of the table.  The dictionary is modified, if possible,
        to reflect any changes caused by the update due to triggers, rules,
        default values, etc.

        """
        # Update always works on the oid which get returns if available,
        # otherwise use the primary key.  Fail if neither.
        # Note that we only accept oid key from named args for safety
        qcl = self._add_schema(cl)
        qoid = _oid_key(qcl)
        if 'oid' in kw:
            kw[qoid] = kw['oid']
            del kw['oid']
        if d is None:
            d = {}
        d.update(kw)
        attnames = self.get_attnames(qcl)
        if qoid in d:
            where = 'oid = %s' % d[qoid]
            keyname = ()
        else:
            try:
                keyname = self.pkey(qcl)
            except KeyError:
                raise _prg_error('Class %s has no primary key' % qcl)
            if isinstance(keyname, basestring):
                keyname = (keyname, )
            try:
                where = ' AND '.join([
                    '%s = %s' % (k, self._quote(d[k], attnames[k]))
                    for k in keyname
                ])
            except KeyError:
                raise _prg_error('Update needs primary key or oid.')
        values = []
        for n in attnames:
            if n in d and n not in keyname:
                values.append('%s = %s' % (n, self._quote(d[n], attnames[n])))
        if not values:
            return d
        values = ', '.join(values)
        selectable = self.has_table_privilege(qcl)
        if selectable and self.server_version >= 880200:
            ret = ' RETURNING %s*' % ('oid' in attnames and 'oid, ' or '')
        else:
            ret = ''
        q = 'UPDATE %s SET %s WHERE %s%s' % (qcl, values, where, ret)
        self._do_debug(q)
        res = self.db.query(q)
        if ret:
            res = res.dictresult()[0]
            for att, value in res.iteritems():
                d[att == 'oid' and qoid or att] = value
        else:
            if selectable:
                if qoid in d:
                    self.get(qcl, d, 'oid')
                else:
                    self.get(qcl, d)
        return d

    def clear(self, cl, a=None):
        """Clear all the attributes to values determined by the types.

        Numeric types are set to 0, Booleans are set to 'f', and everything
        else is set to the empty string.  If the array argument is present,
        it is used as the array and any entries matching attribute names are
        cleared with everything else left unchanged.

        """
        # At some point we will need a way to get defaults from a table.
        qcl = self._add_schema(cl)
        if a is None:
            a = {}  # empty if argument is not present
        attnames = self.get_attnames(qcl)
        for n, t in attnames.iteritems():
            if n == 'oid':
                continue
            if t in ('int', 'integer', 'smallint', 'bigint', 'float', 'real',
                     'double precision', 'num', 'numeric', 'money'):
                a[n] = 0
            elif t in ('bool', 'boolean'):
                a[n] = 'f'
            else:
                a[n] = ''
        return a

    def delete(self, cl, d=None, **kw):
        """Delete an existing row in a database table.

        This method deletes the row from a table.  It deletes based on the
        OID value as munged by get or passed as keyword, or on the primary
        key of the table.  The return value is the number of deleted rows
        (i.e. 0 if the row did not exist and 1 if the row was deleted).

        """
        # Like update, delete works on the oid.
        # One day we will be testing that the record to be deleted
        # isn't referenced somewhere (or else PostgreSQL will).
        # Note that we only accept oid key from named args for safety
        qcl = self._add_schema(cl)
        qoid = _oid_key(qcl)
        if 'oid' in kw:
            kw[qoid] = kw['oid']
            del kw['oid']
        if d is None:
            d = {}
        d.update(kw)
        if qoid in d:
            where = 'oid = %s' % d[qoid]
        else:
            try:
                keyname = self.pkey(qcl)
            except KeyError:
                raise _prg_error('Class %s has no primary key' % qcl)
            if isinstance(keyname, basestring):
                keyname = (keyname, )
            attnames = self.get_attnames(qcl)
            try:
                where = ' AND '.join([
                    '%s = %s' % (k, self._quote(d[k], attnames[k]))
                    for k in keyname
                ])
            except KeyError:
                raise _prg_error('Delete needs primary key or oid.')
        q = 'DELETE FROM %s WHERE %s' % (qcl, where)
        self._do_debug(q)
        return int(self.db.query(q))

    def notification_handler(self, event, callback, arg_dict={}, timeout=None):
        """Get notification handler that will run the given callback."""
        return NotificationHandler(self.db, event, callback, arg_dict, timeout)
Beispiel #54
0
class HTMLParser(html.HTMLParser, object):
    """Parser for HTML input based on the Python `HTMLParser` module.
    
    This class provides the same interface for generating stream events as
    `XMLParser`, and attempts to automatically balance tags.
    
    The parsing is initiated by iterating over the parser object:
    
    >>> parser = HTMLParser(StringIO('<UL compact><LI>Foo</UL>'))
    >>> for kind, data, pos in parser:
    ...     print kind, data
    START (QName(u'ul'), Attrs([(QName(u'compact'), u'compact')]))
    START (QName(u'li'), Attrs())
    TEXT Foo
    END li
    END ul
    """

    _EMPTY_ELEMS = frozenset([
        'area', 'base', 'basefont', 'br', 'col', 'frame', 'hr', 'img', 'input',
        'isindex', 'link', 'meta', 'param'
    ])

    def __init__(self, source, filename=None, encoding='utf-8'):
        """Initialize the parser for the given HTML input.
        
        :param source: the HTML text as a file-like object
        :param filename: the name of the file, if known
        :param filename: encoding of the file; ignored if the input is unicode
        """
        html.HTMLParser.__init__(self)
        self.source = source
        self.filename = filename
        self.encoding = encoding
        self._queue = []
        self._open_tags = []

    def parse(self):
        """Generator that parses the HTML source, yielding markup events.
        
        :return: a markup event stream
        :raises ParseError: if the HTML text is not well formed
        """
        def _generate():
            try:
                bufsize = 4 * 1024  # 4K
                done = False
                while 1:
                    while not done and len(self._queue) == 0:
                        data = self.source.read(bufsize)
                        if data == '':  # end of data
                            self.close()
                            done = True
                        else:
                            self.feed(data)
                    for kind, data, pos in self._queue:
                        yield kind, data, pos
                    self._queue = []
                    if done:
                        open_tags = self._open_tags
                        open_tags.reverse()
                        for tag in open_tags:
                            yield END, QName(tag), pos
                        break
            except html.HTMLParseError, e:
                msg = '%s: line %d, column %d' % (e.msg, e.lineno, e.offset)
                raise ParseError(msg, self.filename, e.lineno, e.offset)

        return Stream(_generate()).filter(_coalesce)
    def generate(self, _ignore_req_list=False, _append_slash=False, **kargs):
        """Generate a URL from ourself given a set of keyword arguments
        
        Toss an exception if this
        set of keywords would cause a gap in the url.
        
        """
        # Verify that our args pass any regexp requirements
        if not _ignore_req_list:
            for key in self.reqs.keys():
                val = kargs.get(key)
                if val and not self.req_regs[key].match(
                        self.make_unicode(val)):
                    return False

        # Verify that if we have a method arg, its in the method accept list.
        # Also, method will be changed to _method for route generation
        meth = as_unicode(kargs.get('method'), self.encoding)
        if meth:
            if self.conditions and 'method' in self.conditions \
                and meth.upper() not in self.conditions['method']:
                return False
            kargs.pop('method')

        if self.minimization:
            url = self.generate_minimized(kargs)
        else:
            url = self.generate_non_minimized(kargs)

        if url is False:
            return url

        if not url.startswith('/') and not self.static:
            url = '/' + url
        extras = frozenset(kargs.keys()) - self.maxkeys
        if extras:
            if _append_slash and not url.endswith('/'):
                url += '/'
            fragments = []
            # don't assume the 'extras' set preserves order: iterate
            # through the ordered kargs instead
            for key in kargs:
                if key not in extras:
                    continue
                if key == 'action' or key == 'controller':
                    continue
                val = kargs[key]
                if isinstance(val, (tuple, list)):
                    for value in val:
                        value = as_unicode(value, self.encoding)
                        fragments.append(
                            (key, _str_encode(value, self.encoding)))
                else:
                    val = as_unicode(val, self.encoding)
                    fragments.append((key, _str_encode(val, self.encoding)))
            if fragments:
                url += '?'
                url += urllib.urlencode(fragments)
        elif _append_slash and not url.endswith('/'):
            url += '/'
        return url
Beispiel #56
0
 def validateAttributeValueDraggable(self, token, tagName, attrName,
                                     attrValue):
     for t in self.checkEnumeratedValue(token, tagName, attrName, attrValue,
                                        frozenset(('true', 'false'))) or []:
         yield t
Beispiel #57
0
class HTMLSanitizer(object):
    """A filter that removes potentially dangerous HTML tags and attributes
    from the stream.
    
    >>> from genshi import HTML
    >>> html = HTML('<div><script>alert(document.cookie)</script></div>')
    >>> print html | HTMLSanitizer()
    <div/>
    
    The default set of safe tags and attributes can be modified when the filter
    is instantiated. For example, to allow inline ``style`` attributes, the
    following instantation would work:
    
    >>> html = HTML('<div style="background: #000"></div>')
    >>> sanitizer = HTMLSanitizer(safe_attrs=HTMLSanitizer.SAFE_ATTRS | set(['style']))
    >>> print html | sanitizer
    <div style="background: #000"/>
    
    Note that even in this case, the filter *does* attempt to remove dangerous
    constructs from style attributes:

    >>> html = HTML('<div style="background: url(javascript:void); color: #000"></div>')
    >>> print html | sanitizer
    <div style="color: #000"/>
    
    This handles HTML entities, unicode escapes in CSS and Javascript text, as
    well as a lot of other things. However, the style tag is still excluded by
    default because it is very hard for such sanitizing to be completely safe,
    especially considering how much error recovery current web browsers perform.
    
    :warn: Note that this special processing of CSS is currently only applied to
           style attributes, **not** style elements.
    """

    SAFE_TAGS = frozenset([
        'a', 'abbr', 'acronym', 'address', 'area', 'b', 'big', 'blockquote',
        'br', 'button', 'caption', 'center', 'cite', 'code', 'col', 'colgroup',
        'dd', 'del', 'dfn', 'dir', 'div', 'dl', 'dt', 'em', 'fieldset', 'font',
        'form', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'hr', 'i', 'img', 'input',
        'ins', 'kbd', 'label', 'legend', 'li', 'map', 'menu', 'ol', 'optgroup',
        'option', 'p', 'pre', 'q', 's', 'samp', 'select', 'small', 'span',
        'strike', 'strong', 'sub', 'sup', 'table', 'tbody', 'td', 'textarea',
        'tfoot', 'th', 'thead', 'tr', 'tt', 'u', 'ul', 'var'
    ])

    SAFE_ATTRS = frozenset([
        'abbr', 'accept', 'accept-charset', 'accesskey', 'action', 'align',
        'alt', 'axis', 'bgcolor', 'border', 'cellpadding', 'cellspacing',
        'char', 'charoff', 'charset', 'checked', 'cite', 'class', 'clear',
        'cols', 'colspan', 'color', 'compact', 'coords', 'datetime', 'dir',
        'disabled', 'enctype', 'for', 'frame', 'headers', 'height', 'href',
        'hreflang', 'hspace', 'id', 'ismap', 'label', 'lang', 'longdesc',
        'maxlength', 'media', 'method', 'multiple', 'name', 'nohref',
        'noshade', 'nowrap', 'prompt', 'readonly', 'rel', 'rev', 'rows',
        'rowspan', 'rules', 'scope', 'selected', 'shape', 'size', 'span',
        'src', 'start', 'summary', 'tabindex', 'target', 'title', 'type',
        'usemap', 'valign', 'value', 'vspace', 'width'
    ])

    SAFE_SCHEMES = frozenset(['file', 'ftp', 'http', 'https', 'mailto', None])

    URI_ATTRS = frozenset(
        ['action', 'background', 'dynsrc', 'href', 'lowsrc', 'src'])

    def __init__(self,
                 safe_tags=SAFE_TAGS,
                 safe_attrs=SAFE_ATTRS,
                 safe_schemes=SAFE_SCHEMES,
                 uri_attrs=URI_ATTRS):
        """Create the sanitizer.
        
        The exact set of allowed elements and attributes can be configured.
        
        :param safe_tags: a set of tag names that are considered safe
        :param safe_attrs: a set of attribute names that are considered safe
        :param safe_schemes: a set of URI schemes that are considered safe
        :param uri_attrs: a set of names of attributes that contain URIs
        """
        self.safe_tags = safe_tags
        "The set of tag names that are considered safe."
        self.safe_attrs = safe_attrs
        "The set of attribute names that are considered safe."
        self.uri_attrs = uri_attrs
        "The set of names of attributes that may contain URIs."
        self.safe_schemes = safe_schemes
        "The set of URI schemes that are considered safe."

    def __call__(self, stream):
        """Apply the filter to the given stream.
        
        :param stream: the markup event stream to filter
        """
        waiting_for = None

        for kind, data, pos in stream:
            if kind is START:
                if waiting_for:
                    continue
                tag, attrs = data
                if tag not in self.safe_tags:
                    waiting_for = tag
                    continue

                new_attrs = []
                for attr, value in attrs:
                    value = stripentities(value)
                    if attr not in self.safe_attrs:
                        continue
                    elif attr in self.uri_attrs:
                        # Don't allow URI schemes such as "javascript:"
                        if not self.is_safe_uri(value):
                            continue
                    elif attr == 'style':
                        # Remove dangerous CSS declarations from inline styles
                        decls = self.sanitize_css(value)
                        if not decls:
                            continue
                        value = '; '.join(decls)
                    new_attrs.append((attr, value))

                yield kind, (tag, Attrs(new_attrs)), pos

            elif kind is END:
                tag = data
                if waiting_for:
                    if waiting_for == tag:
                        waiting_for = None
                else:
                    yield kind, data, pos

            elif kind is not COMMENT:
                if not waiting_for:
                    yield kind, data, pos

    def is_safe_uri(self, uri):
        """Determine whether the given URI is to be considered safe for
        inclusion in the output.
        
        The default implementation checks whether the scheme of the URI is in
        the set of allowed URIs (`safe_schemes`).
        
        >>> sanitizer = HTMLSanitizer()
        >>> sanitizer.is_safe_uri('http://example.org/')
        True
        >>> sanitizer.is_safe_uri('javascript:alert(document.cookie)')
        False
        
        :param uri: the URI to check
        :return: `True` if the URI can be considered safe, `False` otherwise
        :rtype: `bool`
        """
        if ':' not in uri:
            return True  # This is a relative URI
        chars = [char for char in uri.split(':', 1)[0] if char.isalnum()]
        return ''.join(chars).lower() in self.safe_schemes

    def sanitize_css(self, text):
        """Remove potentially dangerous property declarations from CSS code.
        
        In particular, properties using the CSS ``url()`` function with a scheme
        that is not considered safe are removed:
        
        >>> sanitizer = HTMLSanitizer()
        >>> sanitizer.sanitize_css(u'''
        ...   background: url(javascript:alert("foo"));
        ...   color: #000;
        ... ''')
        [u'color: #000']
        
        Also, the proprietary Internet Explorer function ``expression()`` is
        always stripped:
        
        >>> sanitizer.sanitize_css(u'''
        ...   background: #fff;
        ...   color: #000;
        ...   width: e/**/xpression(alert("foo"));
        ... ''')
        [u'background: #fff', u'color: #000']
        
        :param text: the CSS text; this is expected to be `unicode` and to not
                     contain any character or numeric references
        :return: a list of declarations that are considered safe
        :rtype: `list`
        """
        decls = []
        text = self._strip_css_comments(self._replace_unicode_escapes(text))
        for decl in filter(None, text.split(';')):
            decl = decl.strip()
            if not decl:
                continue
            is_evil = False
            if 'expression' in decl:
                is_evil = True
            for match in re.finditer(r'url\s*\(([^)]+)', decl):
                if not self.is_safe_uri(match.group(1)):
                    is_evil = True
                    break
            if not is_evil:
                decls.append(decl.strip())
        return decls

    _NORMALIZE_NEWLINES = re.compile(r'\r\n').sub
    _UNICODE_ESCAPE = re.compile(r'\\([0-9a-fA-F]{1,6})\s?').sub

    def _replace_unicode_escapes(self, text):
        def _repl(match):
            return unichr(int(match.group(1), 16))

        return self._UNICODE_ESCAPE(_repl,
                                    self._NORMALIZE_NEWLINES('\n', text))

    _CSS_COMMENTS = re.compile(r'/\*.*?\*/').sub

    def _strip_css_comments(self, text):
        return self._CSS_COMMENTS('', text)
Beispiel #58
0
 def __hash__(self):
     rval = getattr(self, '_hash', None)
     if rval is None:
         rval = self._hash = hash(frozenset(self.iteritems()))
     return rval
Beispiel #59
0
 def optimize(self):
     all_chars = self.get_all_chars()
     # find mergeable
     non_final = frozenset(
         set(range(self.num_states)) - self.final_states -
         self.unmergeable_states)
     final = frozenset(self.final_states - self.unmergeable_states)
     state_to_set = {}
     equivalence_sets = set()
     if non_final:
         equivalence_sets.add(non_final)
     if final:
         equivalence_sets.add(final)
     for state in range(self.num_states):
         if state in final:
             state_to_set[state] = final
         elif state in self.unmergeable_states:
             singleset = frozenset([state])
             state_to_set[state] = singleset
             equivalence_sets.add(singleset)
         else:
             state_to_set[state] = non_final
     assert len(equivalence_sets) <= self.num_states
     while len(equivalence_sets) < self.num_states:
         new_equivalence_sets = set()
         changed = False
         for equivalent in equivalence_sets:
             for char in all_chars:
                 targets = {}
                 for state in equivalent:
                     if (state, char) in self:
                         nextstate = self[state, char]
                         target = frozenset(state_to_set[nextstate])
                     else:
                         nextstate = None
                         target = None
                     targets.setdefault(target, set()).add(state)
                 if len(targets) != 1:
                     for target, newequivalent in targets.iteritems():
                         newequivalent = frozenset(newequivalent)
                         new_equivalence_sets.add(newequivalent)
                         for state in newequivalent:
                             state_to_set[state] = newequivalent
                     changed = True
                     break
             else:
                 new_equivalence_sets.add(equivalent)
         if not changed:
             break
         equivalence_sets = new_equivalence_sets
     if len(equivalence_sets) == self.num_states:
         return False
     # merging the states
     newnames = []
     newtransitions = {}
     newnum_states = len(equivalence_sets)
     newstates = list(equivalence_sets)
     newstate_to_index = {}
     newfinal_states = set()
     newunmergeable_states = set()
     for i, newstate in enumerate(newstates):
         newstate_to_index[newstate] = i
     # bring startstate into first slot
     startstateindex = newstate_to_index[state_to_set[0]]
     newstates[0], newstates[startstateindex] = newstates[
         startstateindex], newstates[0]
     newstate_to_index[newstates[0]] = 0
     newstate_to_index[newstates[startstateindex]] = startstateindex
     for i, newstate in enumerate(newstates):
         name = ", ".join([self.names[s] for s in newstate])
         for state in newstate:
             if state in self.unmergeable_states:
                 newunmergeable_states.add(i)
                 name = self.names[state]
             if state in self.final_states:
                 newfinal_states.add(i)
         newnames.append(name)
     for (state, char), nextstate in self.transitions.iteritems():
         newstate = newstate_to_index[state_to_set[state]]
         newnextstate = newstate_to_index[state_to_set[nextstate]]
         newtransitions[newstate, char] = newnextstate
     self.names = newnames
     self.transitions = newtransitions
     self.num_states = newnum_states
     self.final_states = newfinal_states
     self.unmergeable_states = newunmergeable_states
     return True
    "invalid-uri":
        _(u"Invalid URI: '%(attributeName)s' attribute on <%(tagName)s>."),
    "invalid-http-or-ftp-uri":
        _(u"Invalid URI: '%(attributeName)s' attribute on <%(tagName)s>."),
    "invalid-scheme":
        _(u"Unregistered URI scheme: '%(attributeName)s' attribute on <%(tagName)s>."),
    "invalid-rel":
        _(u"Invalid link relation: '%(attributeName)s' attribute on <%(tagName)s>."),
    "invalid-mime-type":
        _(u"Invalid MIME type: '%(attributeName)s' attribute on <%(tagName)s>."),
})

globalAttributes = frozenset(('class', 'contenteditable', 'contextmenu', 'dir',
    'draggable', 'id', 'irrelevant', 'lang', 'ref', 'tabindex', 'template', 
    'title', 'onabort', 'onbeforeunload', 'onblur', 'onchange', 'onclick',
    'oncontextmenu', 'ondblclick', 'ondrag', 'ondragend', 'ondragenter',
    'ondragleave', 'ondragover', 'ondragstart', 'ondrop', 'onerror', 
    'onfocus', 'onkeydown', 'onkeypress', 'onkeyup', 'onload', 'onmessage',
    'onmousedown', 'onmousemove', 'onmouseout', 'onmouseover', 'onmouseup',
    'onmousewheel', 'onresize', 'onscroll', 'onselect', 'onsubmit', 'onunload'))
# XXX lang in HTML only, xml:lang in XHTML only
# XXX validate ref, template

allowedAttributeMap = {
    'html': frozenset(('xmlns',)),
    'head': frozenset(()),
    'title': frozenset(()),
    'base': frozenset(('href', 'target')),
    'link': frozenset(('href', 'rel', 'media', 'hreflang', 'type')),
    'meta': frozenset(('name', 'http-equiv', 'content', 'charset')), # XXX charset in HTML only
    'style': frozenset(('media', 'type', 'scoped')),
    'body': frozenset(()),