Example #1
0
 def evaluate_as_nodeset(self, context):
     arg0, arg1 = self._args
     # Get the key table
     key_name = arg0.evaluate_as_string(context)
     if not isqname(key_name):
         raise XsltRuntimeError(XsltError.INVALID_QNAME_ARGUMENT,
                                context.instruction, value=key_name)
     key_name = context.expand_qname(key_name)
     try:
         key_documents = context.keys[key_name]
     except KeyError:
         # Unknown key name
         return datatypes.nodeset()
     else:
         key_values = key_documents[context.node.xml_root]
     # Get the lookup value
     value = arg1.evaluate(context)
     if isinstance(value, datatypes.nodeset):
         result = []
         for value in value:
             value = datatypes.string(value)
             if value in key_values:
                 result.extend(key_values[value])
     else:
         value = datatypes.string(value)
         if value in key_values:
             result = key_values[value]
         else:
             result = ()
     return datatypes.nodeset(result)
Example #2
0
def test_substring_function():
    for args, expected in (
        ([string_literal('"3.14Hi"'),
          string_literal('"3.14"')], datatypes.string('14Hi')),
        ([
            string_literal('"3.14Hi"'),
            string_literal('"3.14"'),
            number_literal('1')
        ], datatypes.string('1')),
        ([string_literal('"12345"'),
          number_literal('2'),
          number_literal('3')], datatypes.string('234')),
        ([string_literal('"12345"'),
          number_literal('2')], datatypes.string('2345')),
        ([
            string_literal('"12345"'),
            number_literal('1.5'),
            number_literal('2.6')
        ], datatypes.string('234')),
        ([string_literal('"12345"'),
          number_literal('0'),
          number_literal('3')], datatypes.string('12')),
        ([string_literal('"12345"'), NOT_A_NUMBER,
          number_literal('3')], datatypes.string('')),
        ([string_literal('"12345"'),
          number_literal('1'), NOT_A_NUMBER], datatypes.string('')),
        ([string_literal('"12345"'),
          number_literal('-42'),
          POSITIVE_INFINITY], datatypes.string('12345')),
        ([string_literal('"12345"'), NEGATIVE_INFINITY,
          POSITIVE_INFINITY], datatypes.string('')),
    ):
        result = function_call('substring', args).evaluate_as_string(CONTEXT1)
        assert isinstance(result, datatypes.string)
        assert result == expected, (result, expected)
Example #3
0
 def evaluate_as_string(self, context):
     arg0, = self._args
     if arg0 is None:
         string = datatypes.string(context.node)
     else:
         string = arg0.evaluate_as_string(context)
     return datatypes.string(u' '.join(string.split()))
Example #4
0
 def evaluate_as_nodeset(self, context):
     arg0, arg1 = self._args
     # Get the key table
     key_name = arg0.evaluate_as_string(context)
     if not isqname(key_name):
         raise XsltRuntimeError(XsltError.INVALID_QNAME_ARGUMENT,
                                context.instruction,
                                value=key_name)
     key_name = context.expand_qname(key_name)
     try:
         key_documents = context.keys[key_name]
     except KeyError:
         # Unknown key name
         return datatypes.nodeset()
     else:
         key_values = key_documents[context.node.xml_root]
     # Get the lookup value
     value = arg1.evaluate(context)
     if isinstance(value, datatypes.nodeset):
         result = []
         for value in value:
             value = datatypes.string(value)
             if value in key_values:
                 result.extend(key_values[value])
     else:
         value = datatypes.string(value)
         if value in key_values:
             result = key_values[value]
         else:
             result = ()
     return datatypes.nodeset(result)
Example #5
0
 def evaluate_as_string(self, context):
     arg0, = self._args
     if arg0 is None:
         return datatypes.string(context.node.xml_nodeid)
     for node in arg0.evaluate_as_nodeset(context):
         return datatypes.string(node.xml_nodeid)
     return datatypes.EMPTY_STRING
Example #6
0
 def evaluate_as_string(self, context):
     arg0, = self._args
     if arg0 is None:
         return datatypes.string(context.node.xml_nodeid)
     for node in arg0.evaluate_as_nodeset(context):
         return datatypes.string(node.xml_nodeid)
     return datatypes.EMPTY_STRING
Example #7
0
def test_substring_before_function():
    for args, expected in (
        ([string_literal('"3.14Hi"'), string_literal('"Hi"')], datatypes.string('3.14')),
        ([string_literal('"3.14Hi"'), string_literal('""')], datatypes.string())
        ):
        result = function_call('substring-before', args).evaluate_as_boolean(CONTEXT1)
        assert isinstance(result, datatypes.boolean)
        assert result == expected, (result, expected)
Example #8
0
def test_namespace_uri_function():
    for arg, expected in (
        (nodeset_literal([]), datatypes.string()),
        (nodeset_literal([CHILD3]), datatypes.string('http://foo.com'))
        ):
        result = function_call('namespace-uri', [arg]).evaluate_as_string(CONTEXT1)
        assert isinstance(result, datatypes.string)
        assert result == expected, (result, expected)
Example #9
0
def test_local_name_function():
    for arg, expected in (
        (nodeset_literal([]), datatypes.string()),
        (nodeset_literal([CHILD3]), datatypes.string('CHILD3')),
        ):
        result = function_call('local-name', [arg]).evaluate_as_string(CONTEXT1)
        assert isinstance(result, datatypes.string)
        assert result == expected, (result, expected)
Example #10
0
def test_substring_after_function():
    for args, expected in (
        ([string_literal('"3.14Hi"'), string_literal('"3.14"')], datatypes.string('Hi')),
        ([string_literal('"3.14Hi"'), string_literal('""')], datatypes.string())
        ):
        result = function_call('substring-after', args).evaluate_as_string(CONTEXT1)
        assert isinstance(result, datatypes.string)
        assert result == expected, (result, expected)
Example #11
0
def test_namespace_uri_function():
    for arg, expected in ((nodeset_literal([]), datatypes.string()),
                          (nodeset_literal([CHILD3]),
                           datatypes.string('http://foo.com'))):
        result = function_call('namespace-uri',
                               [arg]).evaluate_as_string(CONTEXT1)
        assert isinstance(result, datatypes.string)
        assert result == expected, (result, expected)
Example #12
0
def test_local_name_function():
    for arg, expected in (
        (nodeset_literal([]), datatypes.string()),
        (nodeset_literal([CHILD3]), datatypes.string('CHILD3')),
    ):
        result = function_call('local-name',
                               [arg]).evaluate_as_string(CONTEXT1)
        assert isinstance(result, datatypes.string)
        assert result == expected, (result, expected)
Example #13
0
def replace_function(context, string, search, replace):
    """
    The str:replace function converts a string to a node-set, with
    each instance of a substring from a given list (obtained from the
    string-values of nodes in the second argument) replaced by the
    node at the corresponding position of the node-set given as the
    third argument. Unreplaced substrings become text nodes.

    The second and third arguments can be any type of object; if
    either is not a node-set, it is treated as if it were a node-set
    of just one text node, formed from the object's string-value.

    Attribute and namespace nodes in the replacement set are
    erroneous but are treated as empty text nodes.

    All occurrences of the longest substrings are replaced first,
    and once a replacement is made, that span of the original string
    is no longer eligible for future replacements.

    An empty search string matches between every character of the
    original string.

    See http://exslt.org/str/functions/replace/str.replace.html for details.
    """
    #FIXME: http://www.exslt.org/str/functions/replace/ doesn't say we have
    #to convert the first arg to a string, but should we, anyway?
    #If not, we should at least check and flag non-strings with a clear error?
    # prepare a list of strings to search for (based on searchNodeSet)
    string = string.evaluate_as_string(context)
    search = search.evaluate(context)
    replace = replace.evaluate(context)
    if isinstance(search, datatypes.nodeset):
        search = map(datatypes.string, search)
    else:
        search = [datatypes.string(search)]
    if isinstance(replace, datatypes.nodeset):
        # use `replace` but replace attr, ns nodes with empty text nodes
        for index, node in enumerate(replace):
            if isinstance(node, (tree.attribute, tree.namespace)):
                replace[index] = tree.text(u'')
    else:
        replace = [tree.text(datatypes.string(replace))]
    # Unpaired search patterns are to be deleted (replacement is None)
    replace = itertools.chain(replace, itertools.repeat(None))
    # Sort the tuples in ascending order by length of string.
    # So that the longest search strings will be replaced first,
    replacements = zip(search, replace, itertools.imap(len, search))
    replacements.sort(key=operator.itemgetter(2), reverse=True)

    # generate a result tree fragment
    context.push_tree_writer(context.instruction.baseUri)
    _replace(context, string, *replacements)
    writer = context.pop_writer()
    rtf = writer.get_result()
    return datatypes.nodeset(rtf.xml_children)
Example #14
0
def replace_function(context, string, search, replace):
    """
    The str:replace function converts a string to a node-set, with
    each instance of a substring from a given list (obtained from the
    string-values of nodes in the second argument) replaced by the
    node at the corresponding position of the node-set given as the
    third argument. Unreplaced substrings become text nodes.

    The second and third arguments can be any type of object; if
    either is not a node-set, it is treated as if it were a node-set
    of just one text node, formed from the object's string-value.

    Attribute and namespace nodes in the replacement set are
    erroneous but are treated as empty text nodes.

    All occurrences of the longest substrings are replaced first,
    and once a replacement is made, that span of the original string
    is no longer eligible for future replacements.

    An empty search string matches between every character of the
    original string.

    See http://exslt.org/str/functions/replace/str.replace.html for details.
    """
    # FIXME: http://www.exslt.org/str/functions/replace/ doesn't say we have
    # to convert the first arg to a string, but should we, anyway?
    # If not, we should at least check and flag non-strings with a clear error?
    # prepare a list of strings to search for (based on searchNodeSet)
    string = string.evaluate_as_string(context)
    search = search.evaluate(context)
    replace = replace.evaluate(context)
    if isinstance(search, datatypes.nodeset):
        search = map(datatypes.string, search)
    else:
        search = [datatypes.string(search)]
    if isinstance(replace, datatypes.nodeset):
        # use `replace` but replace attr, ns nodes with empty text nodes
        for index, node in enumerate(replace):
            if isinstance(node, (tree.attribute, tree.namespace)):
                replace[index] = tree.text(u"")
    else:
        replace = [tree.text(datatypes.string(replace))]
    # Unpaired search patterns are to be deleted (replacement is None)
    replace = itertools.chain(replace, itertools.repeat(None))
    # Sort the tuples in ascending order by length of string.
    # So that the longest search strings will be replaced first,
    replacements = zip(search, replace, itertools.imap(len, search))
    replacements.sort(key=operator.itemgetter(2), reverse=True)

    # generate a result tree fragment
    context.push_tree_writer(context.instruction.baseUri)
    _replace(context, string, *replacements)
    writer = context.pop_writer()
    rtf = writer.get_result()
    return datatypes.nodeset(rtf.xml_children)
Example #15
0
def test_substring_after_function():
    for args, expected in (([
            string_literal('"3.14Hi"'),
            string_literal('"3.14"')
    ], datatypes.string('Hi')),
                           ([string_literal('"3.14Hi"'),
                             string_literal('""')], datatypes.string())):
        result = function_call('substring-after',
                               args).evaluate_as_string(CONTEXT1)
        assert isinstance(result, datatypes.string)
        assert result == expected, (result, expected)
Example #16
0
def test_substring_before_function():
    for args, expected in (([
            string_literal('"3.14Hi"'),
            string_literal('"Hi"')
    ], datatypes.string('3.14')),
                           ([string_literal('"3.14Hi"'),
                             string_literal('""')], datatypes.string())):
        result = function_call('substring-before',
                               args).evaluate_as_boolean(CONTEXT1)
        assert isinstance(result, datatypes.boolean)
        assert result == expected, (result, expected)
Example #17
0
        def handle_element(elem, resource):
            new_resource = None
            prefixes = elem.xml_root.xml_model.prefixes
            if elem.xml_model.metadata_context_expr:
                if not elem.xml_model.metadata_context_expr.evaluate(
                        context(elem, namespaces=prefixes)):
                    return
            #Is there a cue that designates this element as a resource envelope?
            if elem.xml_model.metadata_resource_expr:
                if elem.xml_model.metadata_resource_expr == NODE_ID_MARKER:
                    #FIXME: Isn't going from unicode -> xpath str -> unicode wasteful?
                    new_resource = unicode(datatypes.string(elem.xml_nodeid))
                else:
                    new_resource = unicode(
                        datatypes.string(
                            elem.xml_model.metadata_resource_expr.evaluate(
                                context(elem, namespaces=prefixes))))
            #Is there a cue that designates a relationship in this element?
            if elem.xml_model.metadata_rel_expr:
                #Execute the XPath to get the relationship name/title
                rel = datatypes.string(
                    elem.xml_model.metadata_rel_expr.evaluate(
                        context(elem, namespaces=prefixes)))
                if elem.xml_model.metadata_value_expr:
                    #Execute the XPath to get the relationship value
                    val = elem.xml_model.metadata_value_expr.evaluate(
                        context(elem, namespaces=prefixes))
                elif new_resource is not None:
                    #If the element is also a resource envelope, the default value is the new resource ID
                    val = new_resource
                else:
                    #Handle the default ak:value of "."
                    val = datatypes.nodeset([elem])
                yield (unicode(resource), unicode(rel), val)
                #Basically expandqname first
                #prefix, local = splitqname(rattr)
                #try:
                #    ns = elem.xml_namespaces[prefix]
                #    resource = ns + local
                #except KeyError:
                #    resource = rattr
            if new_resource is not None: resource = new_resource

            for rel_expr, val_expr in elem.xml_model.other_rel_exprs:
                rel = datatypes.string(
                    elem.xml_select(rel_expr, prefixes=prefixes))
                val = elem.xml_select(val_expr, prefixes=prefixes)
                yield (unicode(resource), unicode(rel), val)

            for child in elem.xml_elements:
                for item in handle_element(child, resource):
                    yield item
            return
Example #18
0
    def _match_nodes(self, context, nodes):
        initial_focus = context.node, context.position, context.size
        context.size = len(nodes)
        position = 1
        for node in nodes:
            context.node = context.current_node = node
            context.position = position
            position += 1
            # Get the possible matches for `node`
            type_key = node.xml_typecode
            type_table = self._match_table
            if type_key in type_table:
                if type_key == tree.element.xml_typecode:
                    element_table = type_table[type_key]
                    name_key = node.xml_name
                    if name_key in element_table:
                        matches = element_table[name_key]
                    else:
                        matches = element_table[None]
                else:
                    matches = type_table[type_key]
            else:
                matches = type_table[tree.node.xml_typecode]

            for pattern, axis_type, namespaces, use_expr in matches:
                context.namespaces = namespaces
                try:
                    m = pattern.match(context, node, axis_type)
                except XPathError, exc:
                    raise XsltError(exc.code)

                if m:
                    focus = context.node, context.position, context.size
                    context.node, context.position, context.size = node, 1, 1
                    value = use_expr.evaluate(context)
                    if isinstance(value, datatypes.nodeset):
                        for value in value:
                            yield datatypes.string(value), node
                    else:
                        yield datatypes.string(value), node
                    context.node, context.position, context.size = focus

            if isinstance(node, tree.element):
                for item in self._match_nodes(context, node.xml_children):
                    yield item
                if self._matches_attribute and node.xml_attributes:
                    attributes = tuple(node.xml_attributes.nodes())
                    for item in self._match_nodes(context, attributes):
                        yield item
            elif isinstance(node, tree.entity):
                for item in self._match_nodes(context, node.xml_children):
                    yield item
Example #19
0
    def _match_nodes(self, context, nodes):
        initial_focus = context.node, context.position, context.size
        context.size = len(nodes)
        position = 1
        for node in nodes:
            context.node = context.current_node = node
            context.position = position
            position += 1
            # Get the possible matches for `node`
            type_key = node.xml_typecode
            type_table = self._match_table
            if type_key in type_table:
                if type_key == tree.element.xml_typecode:
                    element_table = type_table[type_key]
                    name_key = node.xml_name
                    if name_key in element_table:
                        matches = element_table[name_key]
                    else:
                        matches = element_table[None]
                else:
                    matches = type_table[type_key]
            else:
                matches = type_table[tree.node.xml_typecode]

            for pattern, axis_type, namespaces, use_expr in matches:
                context.namespaces = namespaces
                try:
                    m = pattern.match(context, node, axis_type)
                except XPathError, exc:
                    raise XsltError(exc.code)
                    
                if m:
                    focus = context.node, context.position, context.size
                    context.node, context.position, context.size = node, 1, 1
                    value = use_expr.evaluate(context)
                    if isinstance(value, datatypes.nodeset):
                        for value in value:
                            yield datatypes.string(value), node
                    else:
                        yield datatypes.string(value), node
                    context.node, context.position, context.size = focus

            if isinstance(node, tree.element):
                for item in self._match_nodes(context, node.xml_children):
                    yield item
                if self._matches_attribute and node.xml_attributes:
                    attributes = tuple(node.xml_attributes.nodes())
                    for item in self._match_nodes(context, attributes):
                        yield item
            elif isinstance(node, tree.entity):
                for item in self._match_nodes(context, node.xml_children):
                    yield item
Example #20
0
    def evaluate_as_nodeset(self, context):
        arg0, = self._args
        arg0 = arg0.evaluate(context)
        if isinstance(arg0, datatypes.nodeset):
            ids = set(datatypes.string(x) for x in arg0)
        else:
            arg0 = datatypes.string(arg0)
            ids = set(arg0.split())

        doc = context.node.xml_root
        nodeset = filter(None, (doc.xml_lookup(id) for id in ids))
        nodeset.sort()
        return datatypes.nodeset(nodeset)
Example #21
0
    def evaluate_as_nodeset(self, context):
        arg0, = self._args
        arg0 = arg0.evaluate(context)
        if isinstance(arg0, datatypes.nodeset):
            ids = set(datatypes.string(x) for x in arg0)
        else:
            arg0 = datatypes.string(arg0)
            ids = set(arg0.split())

        doc = context.node.xml_root
        nodeset = filter(None, (doc.xml_lookup(id) for id in ids))
        nodeset.sort()
        return datatypes.nodeset(nodeset)
Example #22
0
def U(s, encoding='utf-8', noneok=False):
    """
    Apply a set of heuristics to the object to figure out how best
    to get text from it.
    
    XML is just text. Unfortunately there's a lot that gets in the way of the
    text in common usage: data types, XPath strings (very close, but not exactly
    the same thing as Python Unicode objects), Python string objects, character
    encodings, etc.  This function does its best to cut through all the complexity
    and get you back as conveniently as possible to what's important: the text
    
    import amara
    from amara.lib import U
    x = amara.parse('<a x="1">spam</a>')
    U(x.xml_select('a'))
    
    Note: you can make U always just convert None to u'' as follows:
    
    >>> from functools import partial
    >>> from amara.lib import U
    >>> U = partial(U, noneok=True)
    >>> U(None)
    u''
    """
    from amara import tree
    from amara.xpath import datatypes
    #xpath.datatypes.string is a subclass of Unicode object, so it won't fall through
    #the test below into the XPath section proper
    if isinstance(s, datatypes.string): return unicode(s)
    #If it's already a Unicode object, nothing to do
    if isinstance(s, unicode): return s
    #If it's a string, decode it to yield Unicode
    if isinstance(s, str): return s.decode(encoding)
    #If it's an Amara node, return its XPath-based string value
    if isinstance(s, tree.node): return unicode(datatypes.string(s))
    #If it's an XPath data type object, apply the equivalent of the XPath string() function
    if isinstance(s, datatypes.xpathobject):
        return unicode(datatypes.string(s))
    #Specialize datetime object treatment, because the default unicode coercion doesn't go to W3C ISO flavor
    if isinstance(s, datetime.datetime): return s.isoformat()
    if s is None:
        #FIXME: L10N
        if noneok:
            return u''
        else:
            raise TypeError('Refusing to coerce None into Unicode')
    #Otherwise just leap into default coercions
    try:
        return unicode(s)
    except TypeError, e:
        return str(s).decode(encoding)
Example #23
0
def paramvalue(obj):
    """
    Try to convert a Python object into an XPath data model value

    returns the value if successful, else None
    """
    if isinstance(obj, datatypes.xpathobject):
        return obj
    if isinstance(obj, unicode):
        return datatypes.string(obj)
    elif isinstance(obj, str):
        try:
            obj = obj.decode("utf-8")
        except UnicodeError:
            return None
        else:
            return datatypes.string(obj)
    elif isinstance(obj, bool):  # <bool> is subclasses of <int>, test first
        return datatypes.TRUE if obj else datatypes.FALSE
    elif isinstance(obj, (int, long, float)):
        return datatypes.number(obj)
    elif isinstance(obj, tree.node):
        return obj
    # NOTE: At one time (WSGI.xml days) this attemped to be smart and handle
    # all iterables but this would mean blindly dealing with dangerous
    # creatures, such as sockets. So now it's more conservative and sticks to
    # just list & tuple.
    elif isinstance(obj, (list, tuple)):
        # We can only use the list if the items are all nodes or all strings.
        # Strings are converted to a nodeset of text nodes.
        for item in obj:
            if not isinstance(item, (str, unicode)):
                break
        else:
            # We need to use an entity to preserve ordering
            entity = tree.entity()
            for item in obj:
                if isinstance(item, str):
                    try:
                        item = unicode(item, "utf8")
                    except UnicodeError:
                        return None
                entity.xml_append(tree.text(item))
            return datatypes.nodeset(entity.xml_children)
        # We can only use the list if all the items are nodes.
        for item in obj:
            if not isinstance(item, tree.node):
                return None
        return datatypes.nodeset(obj)
    else:
        return None
Example #24
0
def paramvalue(obj):
    """
    Try to convert a Python object into an XPath data model value

    returns the value if successful, else None
    """
    if isinstance(obj, datatypes.xpathobject):
        return obj
    if isinstance(obj, unicode):
        return datatypes.string(obj)
    elif isinstance(obj, str):
        try:
            obj = obj.decode('utf-8')
        except UnicodeError:
            return None
        else:
            return datatypes.string(obj)
    elif isinstance(obj, bool): # <bool> is subclasses of <int>, test first
        return datatypes.TRUE if obj else datatypes.FALSE
    elif isinstance(obj, (int, long, float)):
        return datatypes.number(obj)
    elif isinstance(obj, tree.node):
        return obj
    # NOTE: At one time (WSGI.xml days) this attemped to be smart and handle
    # all iterables but this would mean blindly dealing with dangerous
    # creatures, such as sockets. So now it's more conservative and sticks to
    # just list & tuple.
    elif isinstance(obj, (list, tuple)):
        # We can only use the list if the items are all nodes or all strings.
        # Strings are converted to a nodeset of text nodes.
        for item in obj:
            if not isinstance(item, (str, unicode)):
                break
        else:
            # We need to use an entity to preserve ordering
            entity = tree.entity()
            for item in obj:
                if isinstance(item, str):
                    try:
                        item = unicode(item, 'utf8')
                    except UnicodeError:
                        return None
                entity.xml_append(tree.text(item))
            return datatypes.nodeset(entity.xml_children)
        # We can only use the list if all the items are nodes.
        for item in obj:
            if not isinstance(item, tree.node):
                return None
        return datatypes.nodeset(obj)
    else:
        return None
Example #25
0
def U(s, encoding='utf-8', noneok=False):
    """
    Apply a set of heuristics to the object to figure out how best
    to get text from it.
    
    XML is just text. Unfortunately there's a lot that gets in the way of the
    text in common usage: data types, XPath strings (very close, but not exactly
    the same thing as Python Unicode objects), Python string objects, character
    encodings, etc.  This function does its best to cut through all the complexity
    and get you back as conveniently as possible to what's important: the text
    
    import amara
    from amara.lib import U
    x = amara.parse('<a x="1">spam</a>')
    U(x.xml_select('a'))
    
    Note: you can make U always just convert None to u'' as follows:
    
    >>> from functools import partial
    >>> from amara.lib import U
    >>> U = partial(U, noneok=True)
    >>> U(None)
    u''
    """
    from amara import tree
    from amara.xpath import datatypes
    #xpath.datatypes.string is a subclass of Unicode object, so it won't fall through
    #the test below into the XPath section proper
    if isinstance(s, datatypes.string): return unicode(s)
    #If it's already a Unicode object, nothing to do
    if isinstance(s, unicode): return s
    #If it's a string, decode it to yield Unicode
    if isinstance(s, str): return s.decode(encoding)
    #If it's an Amara node, return its XPath-based string value
    if isinstance(s, tree.node): return unicode(datatypes.string(s))
    #If it's an XPath data type object, apply the equivalent of the XPath string() function
    if isinstance(s, datatypes.xpathobject): return unicode(datatypes.string(s))
    #Specialize datetime object treatment, because the default unicode coercion doesn't go to W3C ISO flavor
    if isinstance(s, datetime.datetime): return s.isoformat()
    if s is None:
        #FIXME: L10N
        if noneok:
            return u''
        else:
            raise TypeError('Refusing to coerce None into Unicode')
    #Otherwise just leap into default coercions
    try:
        return unicode(s)
    except TypeError, e:
        return str(s).decode(encoding)
Example #26
0
 def __unicode__(self):
     """
     Returns a Unicode object with the text contents of this node and
     its descendants, if any.
     Equivalent to XPath string() conversion
     """
     return unicode(datatypes.string(self))
Example #27
0
def replace_function(context, source, pattern, flags, repl):
    """
    The regexp:replace function replaces the parts of a string that match
    a regular expression with another string.

    The first argument is the string to be matched and replaced. The second
    argument is a regular expression that follows the Javascript regular
    expression syntax. The fourth argument is the string to replace the
    matched parts of the string.

    The third argument is a string consisting of character flags to be used
    by the match. If a character is present then that flag is true. The flags
    are:
      g: global replace - all occurrences of the regular expression in the
                          string are replaced. If this character is not
                          present, then only the first occurrence of the
                          regular expression is replaced.
      i: case insensitive - the regular expression is treated as case
                            insensitive. If this character is not present,
                            then the regular expression is case sensitive.
    """
    source = source.evaluate_as_string(context)
    pattern = pattern.evaluate_as_string(context)
    flags = flags.evaluate_as_string(context)
    repl = repl.evaluate_as_string(context)

    regexp = re.compile(pattern, re.IGNORECASE if 'i' in flags else 0)
    # a count of zero means replace all in RE.sub()
    result = regexp.sub(repl, source, 'g' not in flags)
    return datatypes.string(result)
Example #28
0
    def _format(self, number, token, letter_value, separator, grouping):
        if token in ('I', 'i') and letter_value != 'alphabetic':
            # roman numerals
            if 0 < number < self._roman_max:
                result = []
                for bound, digits in self._roman_digits[token == 'i']:
                    if number > bound:
                        index, number = divmod(number, bound)
                        result.append(digits[index])
                    last_digits = digits
                result = u''.join(result)
            else:
                result = '%d' % number
        elif token in ('A', 'a'):
            # alphabetic numbering
            alphabet = ASCII_LOWER if token == 'a' else ASCII_UPPER
            result = self._alpha_sequence(number, alphabet)
        else:
            # arabic numerals
            if token[-1:] != '1':
                # unsupported format token, using '1'
                token == '1'
            result = '%0*d' % (len(token), number)
            if separator and grouping:
                start = -len(numeric)
                step = -grouping
                if start < step:
                    groups = []
                    for next in reversed(xrange(step, start, step)):
                        groups.append(result[start:next])
                        start = next
                    groups.append(result[start:])
                    result = separator.join(groups)

        return datatypes.string(result)
Example #29
0
 def compile_as_string(self, compiler):
     try:
         value = datatypes.string(self._literal)
     except ValueError:
         value = datatypes.string.EMPTY
     compiler.emit('LOAD_CONST', value)
     return
Example #30
0
def replace_function(context, source, pattern, flags, repl):
    """
    The regexp:replace function replaces the parts of a string that match
    a regular expression with another string.

    The first argument is the string to be matched and replaced. The second
    argument is a regular expression that follows the Javascript regular
    expression syntax. The fourth argument is the string to replace the
    matched parts of the string.

    The third argument is a string consisting of character flags to be used
    by the match. If a character is present then that flag is true. The flags
    are:
      g: global replace - all occurrences of the regular expression in the
                          string are replaced. If this character is not
                          present, then only the first occurrence of the
                          regular expression is replaced.
      i: case insensitive - the regular expression is treated as case
                            insensitive. If this character is not present,
                            then the regular expression is case sensitive.
    """
    source = source.evaluate_as_string(context)
    pattern = pattern.evaluate_as_string(context)
    flags = flags.evaluate_as_string(context)
    repl = repl.evaluate_as_string(context)

    regexp = re.compile(pattern, re.IGNORECASE if 'i' in flags else 0)
    # a count of zero means replace all in RE.sub()
    result = regexp.sub(repl, source, 'g' not in flags)
    return datatypes.string(result)
Example #31
0
def align_function(context, target, padding, alignment=None):
    """
    The str:align function aligns a string within another string.

    See http://exslt.org/str/functions/align/str.align.html for further
    explanation.
    """
    target = target.evaluate_as_string(context)
    padding = padding.evaluate_as_string(context)
    alignment = alignment and alignment.evaluate_as_string(context)

    # If the target string is longer than the padding string, then it is
    # truncated to be the same length as the padding string and returned.
    if len(target) > len(padding):
        result = target[:len(padding)]
    # If no third argument is given or if it is not one of 'left', 'right'
    # or 'center', then it defaults to left alignment.
    elif alignment == 'right':
        result = padding[:-len(target)] + target
    elif alignment == 'center':
        # With center alignment, the range of characters replaced by the target
        # string is in the middle of the padding string, such that either the
        # number of unreplaced characters on either side of the range is the
        # same or there is one less on the left than there is on the right.
        left = (len(padding) - len(target)) / 2
        right = left + len(target)
        result = padding[:left] + target + padding[right:]
    else:
        result = target + padding[len(target):]
    return datatypes.string(result)
Example #32
0
def test_concat_function():
    result = function_call('concat', [nodeset_literal([CHILD1]),
                                      string_literal('"3.14"'),
                                      string_literal('"Hi"')]).evaluate_as_string(CONTEXT1)
    assert isinstance(result, datatypes.string)
    expected = datatypes.string('\n    \n    \n    Text1\n  3.14Hi')
    assert result == expected, (result, expected)
Example #33
0
def align_function(context, target, padding, alignment=None):
    """
    The str:align function aligns a string within another string.

    See http://exslt.org/str/functions/align/str.align.html for further
    explanation.
    """
    target = target.evaluate_as_string(context)
    padding = padding.evaluate_as_string(context)
    alignment = alignment and alignment.evaluate_as_string(context)

    # If the target string is longer than the padding string, then it is
    # truncated to be the same length as the padding string and returned.
    if len(target) > len(padding):
        result = target[: len(padding)]
    # If no third argument is given or if it is not one of 'left', 'right'
    # or 'center', then it defaults to left alignment.
    elif alignment == "right":
        result = padding[: -len(target)] + target
    elif alignment == "center":
        # With center alignment, the range of characters replaced by the target
        # string is in the middle of the padding string, such that either the
        # number of unreplaced characters on either side of the range is the
        # same or there is one less on the left than there is on the right.
        left = (len(padding) - len(target)) / 2
        right = left + len(target)
        result = padding[:left] + target + padding[right:]
    else:
        result = target + padding[len(target) :]
    return datatypes.string(result)
Example #34
0
 def compile_as_string(self, compiler):
     try:
         value = datatypes.string(self._literal)
     except ValueError:
         value = datatypes.EMPTY_STRING
     compiler.emit('LOAD_CONST', value)
     return
Example #35
0
def test_normalize_space_function():
    result = function_call('normalize-space',
                           [string_literal('"Ht    	 There	   Mike"')
                            ]).evaluate_as_string(CONTEXT1)
    assert isinstance(result, datatypes.string)
    expected = datatypes.string(u'Ht There Mike')
    assert result == expected, (result, expected)
Example #36
0
File: en.py Project: mredar/amara
    def _format(self, number, token, letter_value, separator, grouping):
        if token in ('I', 'i') and letter_value != 'alphabetic':
            # roman numerals
            if 0 < number < self._roman_max:
                result = []
                for bound, digits in self._roman_digits[token == 'i']:
                    if number > bound:
                        index, number = divmod(number, bound)
                        result.append(digits[index])
                    last_digits = digits
                result = u''.join(result)
            else:
                result = '%d' % number
        elif token in ('A', 'a'):
            # alphabetic numbering
            alphabet = ASCII_LOWER if token == 'a' else ASCII_UPPER
            result = self._alpha_sequence(number, alphabet)
        else:
            # arabic numerals
            if token[-1:] != '1':
                # unsupported format token, using '1'
                token == '1'
            result = '%0*d' % (len(token), number)
            if separator and grouping:
                start = -len(numeric)
                step = -grouping
                if start < step:
                    groups = []
                    for next in reversed(xrange(step, start, step)):
                        groups.append(result[start:next])
                        start = next
                    groups.append(result[start:])
                    result = separator.join(groups)

        return datatypes.string(result)
Example #37
0
 def __unicode__(self):
     '''
     Returns a Unicode object with the text contents of this node and
     its descendants, if any.
     Equivalent to XPath string() conversion
     '''
     return unicode(datatypes.string(self))
Example #38
0
 def evaluate_as_number(self, context):
     arg0, = self._args
     if arg0 is None:
         string = datatypes.string(context.node)
     else:
         string = arg0.evaluate_as_string(context)
     return datatypes.number(len(string))
Example #39
0
def date_time_function(context):
    """
    The `date:date-time` function returns the current local date/time as an
    ISO 8601 formatted date/time string, with a time zone.

    Implements version 1.
    """
    return datatypes.string(_datetime.now())
Example #40
0
 def instantiate(self, context):
     context.namespaces = self.namespaces
     result = self.select.evaluate(context)
     if isinstance(result, datatypes.nodeset):
         for node in result:
             context.copy_node(node)
     else:
         context.text(datatypes.string(result))
Example #41
0
 def evaluate_as_string(self, context):
     arg0, = self._args
     name = arg0.evaluate_as_string(context)
     try:
         uri = context.node.xml_root.xml_unparsed_entities[name]
     except KeyError:
         return datatypes.EMPTY_STRING
     return datatypes.string(uri)
Example #42
0
 def evaluate_as_string(self, context):
     arg0, = self._args
     name = arg0.evaluate_as_string(context)
     try:
         uri = context.node.xml_root.xml_unparsed_entities[name]
     except KeyError:
         return datatypes.EMPTY_STRING
     return datatypes.string(uri)
Example #43
0
    def evaluate_as_string(self, context):
        arg0, = self._args
        if arg0 is None:
            node = context.node
        else:
            arg0 = arg0.evaluate_as_nodeset(context)
            if not arg0:
                return datatypes.EMPTY_STRING
            node = arg0[0]

        if isinstance(node, (tree.element, tree.attribute)):
            return datatypes.string(node.xml_qname)
        elif isinstance(node, tree.processing_instruction):
            return datatypes.string(node.xml_target)
        elif isinstance(node, tree.namespace):
            return datatypes.string(node.xml_name)
        return datatypes.EMPTY_STRING
Example #44
0
 def instantiate(self, context):
     context.namespaces = self.namespaces
     result = self.select.evaluate(context)
     if isinstance(result, datatypes.nodeset):
         for node in result:
             context.copy_node(node)
     else:
         context.text(datatypes.string(result))
Example #45
0
    def evaluate_as_string(self, context):
        arg0, = self._args
        if arg0 is None:
            node = context.node
        else:
            arg0 = arg0.evaluate_as_nodeset(context)
            if not arg0:
                return datatypes.EMPTY_STRING
            node = arg0[0]

        if isinstance(node, (tree.element, tree.attribute)):
            return datatypes.string(node.xml_local)
        elif isinstance(node, tree.processing_instruction):
            return datatypes.string(node.xml_target)
        elif isinstance(node, tree.namespace):
            return datatypes.string(node.xml_name)
        return datatypes.EMPTY_STRING
Example #46
0
 def evaluate_as_string(self, context):
     outer, inner = self._args
     outer = outer.evaluate_as_string(context)
     inner = inner.evaluate_as_string(context)
     index = outer.find(inner)
     if index == -1:
         return datatypes.EMPTY_STRING
     return datatypes.string(outer[:index])
Example #47
0
def date_time_function(context):
    """
    The `date:date-time` function returns the current local date/time as an
    ISO 8601 formatted date/time string, with a time zone.

    Implements version 1.
    """
    return datatypes.string(_datetime.now())
Example #48
0
        def handle_element(elem, resource):
            new_resource = None
            prefixes = elem.xml_root.xml_model.prefixes
            if elem.xml_model.metadata_context_expr:
                if not elem.xml_model.metadata_context_expr.evaluate(context(elem, namespaces=prefixes)):
                    return
            #Is there a cue that designates this element as a resource envelope?
            if elem.xml_model.metadata_resource_expr:
                if elem.xml_model.metadata_resource_expr == NODE_ID_MARKER:
                    #FIXME: Isn't going from unicode -> xpath str -> unicode wasteful?
                    new_resource = unicode(datatypes.string(elem.xml_nodeid))
                else:
                    new_resource = unicode(datatypes.string(elem.xml_model.metadata_resource_expr.evaluate(context(elem, namespaces=prefixes))))
            #Is there a cue that designates a relationship in this element?
            if elem.xml_model.metadata_rel_expr:
                #Execute the XPath to get the relationship name/title
                rel = datatypes.string(elem.xml_model.metadata_rel_expr.evaluate(context(elem, namespaces=prefixes)))
                if elem.xml_model.metadata_value_expr:
                    #Execute the XPath to get the relationship value
                    val = elem.xml_model.metadata_value_expr.evaluate(context(elem, namespaces=prefixes))
                elif new_resource is not None:
                    #If the element is also a resource envelope, the default value is the new resource ID
                    val = new_resource
                else:
                    #Handle the default ak:value of "."
                    val = datatypes.nodeset([elem])
                yield (unicode(resource), unicode(rel), val)
                #Basically expandqname first
                #prefix, local = splitqname(rattr)
                #try:
                #    ns = elem.xml_namespaces[prefix]
                #    resource = ns + local
                #except KeyError:
                #    resource = rattr
            if new_resource is not None: resource = new_resource

            for rel_expr, val_expr in elem.xml_model.other_rel_exprs:
                rel = datatypes.string(elem.xml_select(rel_expr, prefixes=prefixes))
                val = elem.xml_select(val_expr, prefixes=prefixes)
                yield (unicode(resource), unicode(rel), val)
            
            for child in elem.xml_elements:
                for item in handle_element(child, resource):
                    yield item
            return
Example #49
0
def test_concat_function():
    result = function_call('concat', [
        nodeset_literal([CHILD1]),
        string_literal('"3.14"'),
        string_literal('"Hi"')
    ]).evaluate_as_string(CONTEXT1)
    assert isinstance(result, datatypes.string)
    expected = datatypes.string('\n    \n    \n    Text1\n  3.14Hi')
    assert result == expected, (result, expected)
Example #50
0
def concat_function(context, nodeset):
    """
    The str:concat function takes a node set and returns the concatenation of
    the string values of the nodes in that node set. If the node set is empty,
    it returns an empty string.
    """
    nodeset = nodeset.evaluate_as_nodeset(context)
    strings = map(datatypes.string, nodeset)
    return datatypes.string(u"".join(strings))
Example #51
0
def concat_function(context, nodeset):
    """
    The str:concat function takes a node set and returns the concatenation of
    the string values of the nodes in that node set. If the node set is empty,
    it returns an empty string.
    """
    nodeset = nodeset.evaluate_as_nodeset(context)
    strings = map(datatypes.string, nodeset)
    return datatypes.string(u''.join(strings))
Example #52
0
def test_substring_function():
    for args, expected in (
        ([string_literal('"3.14Hi"'), string_literal('"3.14"')], datatypes.string('14Hi')),
        ([string_literal('"3.14Hi"'), string_literal('"3.14"'), number_literal('1')],  datatypes.string('1')),
        ([string_literal('"12345"'), number_literal('2'), number_literal('3')],  datatypes.string('234')),
        ([string_literal('"12345"'), number_literal('2')],  datatypes.string('2345')),
        ([string_literal('"12345"'), number_literal('1.5'), number_literal('2.6')],  datatypes.string('234')),
        ([string_literal('"12345"'), number_literal('0'), number_literal('3')],  datatypes.string('12')),
        ([string_literal('"12345"'), NOT_A_NUMBER, number_literal('3')],  datatypes.string('')),
        ([string_literal('"12345"'), number_literal('1'), NOT_A_NUMBER],  datatypes.string('')),
        ([string_literal('"12345"'), number_literal('-42'), POSITIVE_INFINITY],  datatypes.string('12345')),
        ([string_literal('"12345"'), NEGATIVE_INFINITY, POSITIVE_INFINITY],  datatypes.string('')),
        ):
        result = function_call('substring', args).evaluate_as_string(CONTEXT1)
        assert isinstance(result, datatypes.string)
        assert result == expected, (result, expected)
Example #53
0
    def evaluate_as_string(self, context):
        string, start, length = self._args
        string = string.evaluate_as_string(context)
        start = start.evaluate_as_number(context)

        # start == NaN: spec doesn't say; assume no substring to return
        # start == +Inf or -Inf: no substring to return
        if not start.isfinite():
            return datatypes.EMPTY_STRING

        # start is finite, safe for int() and round().
        start = int(round(start))
        # convert to 0-based index for python string slice
        if start < 1:
            startidx = 0
        else:
            startidx = start - 1

        # length undefined: return chars startidx to end
        if length is None:
            return datatypes.string(string[startidx:])

        length = length.evaluate_as_number(context)
        if not length.isfinite():
            # length == +Inf: return chars startidx to end
            if length > 0:
                assert length.isinf()
                return datatypes.string(string[startidx:])
            # length == NaN: spec doesn't say; assume no substring to return
            # length == -Inf: no substring to return
            return datatypes.EMPTY_STRING

        # length is finite, safe for int() and round().
        length = int(round(length))

        # return value must end before position (start+length)
        # which is (start+length-1) in 0-based index
        endidx = start + length - 1
        if endidx < startidx:
            return datatypes.EMPTY_STRING
        return datatypes.string(string[startidx:endidx])
Example #54
0
 def evaluate(self, context):
     arg0, = self._args
     arg0 = arg0.evaluate_as_string(context)
     namespace, property = context.expand_qname(arg0)
     if namespace == XSL_NAMESPACE:
         if property == 'version':
             return datatypes.number(1)
         elif property == 'vender':
             return datatypes.string('Amara')
         elif property == 'vender-url':
             return datatypes.string('http://hg.4suite.org/amara')
     elif namespace == EXTENSION_NAMESPACE:
         if property == 'version':
             return datatypes.string(__version__)
         elif property == 'platform':
             return datatypes.string(sys.platform)
         elif property == 'tempdir':
             raise
     elif namespace == 'http://xmlns.4suite.org/xslt/env-system-property':
         raise
     return datatypes.EMPTY_STRING
Example #55
0
    def instantiate(self, context):
        context.instruction = self
        context.namespaces = self.namespaces

        result = self._select.evaluate(context)
        if isinstance(result, tree.node):
            context.copy_node(result)
        elif isinstance(result, datatypes.nodeset):
            context.copy_nodes(result)
        else:
            context.text(datatypes.string(result))
        return