Example #1
0
def lexer_from_complex_type(t: UxsdComplex) -> str:
    """Generate one or two C++ functions to convert const char *s to enum values
	generated from an UxsdComplex.

	It's in the form of (a|g)tok_foo lex_(attr|node)_foo(const char *in) and currently uses
	a trie to lex the string. a or g indicates if the token is an attribute token or a group
	(child element) token.
	"""
    out = ""
    if isinstance(t.content, (UxsdDfa, UxsdAll)):
        out += "inline gtok_%s lex_node_%s(const char *in){\n" % (t.cpp, t.cpp)
        triehash_alph = [(e.name,
                          "gtok_%s::%s" % (t.cpp, utils.to_token(e.name)))
                         for e in t.content.children]
        out += utils.indent(triehash.gen_lexer_body(triehash_alph))
        out += "\tthrow std::runtime_error(\"Found unrecognized child \" + std::string(in) + \" of <%s>.\");\n" % t.name
        out += "}\n"
    if t.attrs:
        out += "inline atok_%s lex_attr_%s(const char *in){\n" % (t.cpp, t.cpp)
        triehash_alph = [(x.name,
                          "atok_%s::%s" % (t.cpp, utils.to_token(x.name)))
                         for x in t.attrs]
        out += utils.indent(triehash.gen_lexer_body(triehash_alph))
        out += "\tthrow std::runtime_error(\"Found unrecognized attribute \" + std::string(in) + \" of <%s>.\");\n" % t.name
        out += "}\n"
    return out
Example #2
0
def _gen_write_simple(t: UxsdSimple,
                      container: str,
                      attr_name: str = "") -> str:
    """Partial function to generate code which writes out a simple type.

	The attr_name parameter is passed by _gen_write_attr so that we can
	generate squashed code like `os << "index=\"" << y_list.index << "\"";`.
	"""
    out = ""
    if isinstance(t, UxsdAtomic):
        if attr_name:
            out += "os << \" %s=\\\"\" << %s << \"\\\"\";\n" % (attr_name,
                                                                container)
        else:
            out += "os << %s;\n" % container
    elif isinstance(t, UxsdEnum):
        if attr_name:
            out += "os << \" %s=\\\"\" << lookup_%s[(int)%s] << \"\\\"\";\n" % (
                attr_name, t.name, container)
        else:
            out += "os << lookup_%s[(int)%s];\n" % (t.name, container)
    elif isinstance(t, UxsdUnion):
        for m in t.member_types:
            out += "if(%s.tag == type_tag::%s)" % (container,
                                                   utils.to_token(m.cpp))
            out += utils.indent(
                _gen_write_simple(
                    t, container + "." + utils.to_union_field_name(m.cpp),
                    attr_name))
    else:
        raise NotImplementedError("I don't know how to write out %s." % t)
    return out
Example #3
0
def _gen_load_attrs(t: UxsdComplex) -> str:
    """Partial function to generate the attribute loading portion of a C++
	function load_foo. See _gen_load_all to see how attributes are validated.
	"""
    assert len(t.attrs) > 0
    N = len(t.attrs)
    out = ""
    out += "std::bitset<%d> astate = 0;\n" % N
    out += "for(pugi::xml_attribute attr = root.first_attribute(); attr; attr = attr.next_attribute()){\n"
    out += "\tatok_%s in = lex_attr_%s(attr.name());\n" % (t.cpp, t.cpp)
    out += "\tif(astate[(int)in] == 0) astate[(int)in] = 1;\n"
    out += "\telse throw std::runtime_error(\"Duplicate attribute \" + std::string(attr.name()) + \" in <%s>.\");\n" % t.name

    out += "\tswitch(in){\n"
    for attr in t.attrs:
        out += "\tcase atok_%s::%s:\n" % (t.cpp, utils.to_token(attr.name))
        out += utils.indent(
            _gen_load_simple(attr.type, "out->%s" % utils.checked(attr.name),
                             "attr.value()"), 2)
        out += "\t\tbreak;\n"
    out += "\tdefault: break; /* Not possible. */\n"
    out += "\t}\n"
    out += "}\n"

    mask = "".join(["1" if x.optional else "0" for x in t.attrs][::-1])
    out += "std::bitset<%d> test_astate = astate | std::bitset<%d>(0b%s);\n" % (
        N, N, mask)
    out += "if(!test_astate.all()) attr_error(test_astate, atok_lookup_%s);\n" % t.cpp
    return out
Example #4
0
def tokens_from_enum(t: UxsdEnum) -> str:
    """Generate C++ enum of token values from an UxsdEnum"""
    out = ""
    enum_tokens = ["UXSD_INVALID = 0"]
    enum_tokens += [utils.to_token(x) for x in t.enumeration]
    out += "enum class %s {%s};" % (t.cpp, ", ".join(enum_tokens))
    return out
Example #5
0
def tokens_from_complex_type(t: UxsdComplex) -> str:
    """Generate one or two C++ enums of token values from an UxsdComplex.
	One enum is generated from valid attribute names and the other from child element names.
	"""
    out = ""
    if isinstance(t.content, (UxsdDfa, UxsdAll)):
        enum_tokens = [utils.to_token(e.name) for e in t.content.children]
        lookup_tokens = ["\"%s\"" % e.name for e in t.content.children]
        out += "enum class gtok_%s {%s};\n" % (t.cpp, ", ".join(enum_tokens))
        out += "const char *gtok_lookup_%s[] = {%s};" % (
            t.cpp, ", ".join(lookup_tokens))
    if t.attrs:
        enum_tokens = [utils.to_token(x.name) for x in t.attrs]
        lookup_tokens = ["\"%s\"" % x.name for x in t.attrs]
        out += "enum class atok_%s {%s};\n" % (t.cpp, ", ".join(enum_tokens))
        out += "const char *atok_lookup_%s[] = {%s};\n" % (
            t.cpp, ", ".join(lookup_tokens))
    return out
Example #6
0
def _gen_load_union(t: UxsdUnion, container: str, input: str) -> str:
    out = ""
    for m in t.member_types:
        new_container = "%s.%s" % (container, utils.to_union_field_name(m.cpp))
        out += "%s.tag = type_tag::%s;\n" % (container, utils.to_token(m.cpp))
        if isinstance(m, UxsdAtomic):
            out += "%s = %s;\n" % (new_container, (m.cpp_load_format % input))
            out += "if(errno == 0)\n"
            out += "\tbreak;\n"
        elif isinstance(m, UxsdEnum):
            out += "%s = lex_%s(%s, false);\n" % (new_container, m.cpp, input)
            out += "if(%s != %s::UXSD_INVALID)\n" % (new_container, m.cpp)
            out += "break;\n"
        else:
            raise NotImplementedError(
                "I don't know how to load %s into a union." % m)
    out += "throw std::runtime_error(\"Couldn't load a suitable value into union %s.\");\n" % t.name
    return out
Example #7
0
def _gen_load_dfa(t: UxsdComplex) -> str:
    """Partial function to generate the child element validation&loading portion
	of a C++ function load_foo, if the model group is an xs:sequence or xs:choice.

	xs:sequence/xs:choice groups can be compiled down to a finite automaton.
	This is done in dfa.py. C++ state table is generated in _gen_dfa_table and the
	stream of child elements are validated according to the table here.

	The C++ table has -1s in place of invalid state transitions. If we step into a -1,
	we call dfa_error. We check again at the end of input. If we aren't in an accepted
	state, we again call dfa_error.
	"""
    assert isinstance(t.content, UxsdDfa)
    dfa = t.content.dfa
    out = ""
    out += "int next, state=%d;\n" % dfa.start
    out += "for(pugi::xml_node node = root.first_child(); node; node = node.next_sibling()){\n"
    out += "\tgtok_%s in = lex_node_%s(node.name());\n" % (t.cpp, t.cpp)

    out += "\tnext = gstate_%s[state][(int)in];\n" % t.cpp
    out += "\tif(next == -1)\n"
    out += "\t\tdfa_error(gtok_lookup_%s[(int)in], gstate_%s[state], gtok_lookup_%s, %d);\n"\
        % (t.cpp, t.cpp, t.cpp, len(dfa.alphabet))
    out += "\tstate = next;\n"

    out += "\tswitch(in){\n"
    for el in t.content.children:
        out += "\tcase gtok_%s::%s:\n" % (t.cpp, utils.to_token(el.name))
        out += utils.indent(_gen_load_element(el, "out"), 2)
        out += "\t\tbreak;\n"
    out += "\tdefault: break; /* Not possible. */\n"
    out += "\t}\n"

    reject_cond = " && ".join(["state != %d" % x for x in dfa.accepts])
    out += "}\n"
    out += "if(%s) dfa_error(\"end of input\", gstate_%s[state], gtok_lookup_%s, %d);\n"\
      % (reject_cond, t.cpp, t.cpp, len(dfa.alphabet))

    return out
Example #8
0
def lexer_from_enum(t: UxsdEnum) -> str:
    """Generate a C++ function to convert const char *s to enum values generated
	from an UxsdEnum.

	It's in the form of enum_foo lex_enum_foo(const char *in, bool throw_on_invalid)
	and currently uses a trie to parse the string.
	throw_on_invalid is a hacky parameter to determine if we should throw on
	an invalid value. It's currently necessary to read unions - we don't need to
	throw on an invalid value if we are trying to read into an union but we need
	to throw otherwise.
	"""
    out = ""
    out += "inline %s lex_%s(const char *in, bool throw_on_invalid){\n" % (
        t.cpp, t.cpp)
    triehash_alph = [(x, "%s::%s" % (t.cpp, utils.to_token(x)))
                     for x in t.enumeration]
    out += utils.indent(triehash.gen_lexer_body(triehash_alph))
    out += "\tif(throw_on_invalid)\n"
    out += "\t\tthrow std::runtime_error(\"Found unrecognized enum value \" + std::string(in) + \" of %s.\");\n" % t.cpp
    out += "\treturn %s::UXSD_INVALID;\n" % t.cpp
    out += "}\n"
    return out
Example #9
0
def _gen_load_all(t: UxsdComplex) -> str:
    """Partial function to generate the child element validation&loading portion
	of a C++ function load_foo, if the model group is an xs:all.

	xs:alls can be validated in a similar fashion to xs:attributes. We maintain a
	bitset of which elements are found. At the end, we OR our bitset with the value
	corresponding to the optional elements and check if all bits in it are set. If not,
	we call attr_error with the token lookup table and the OR'd bitset.
	"""
    assert isinstance(t.content, UxsdAll)
    N = len(t.content.children)
    out = ""

    out += "std::bitset<%d> gstate = 0;\n" % N
    out += "for(pugi::xml_node node = root.first_child(); node; node = node.next_sibling()){\n"
    out += "\tgtok_%s in = lex_node_%s(node.name());\n" % (t.cpp, t.cpp)

    out += "\tif(gstate[(int)in] == 0) gstate[(int)in] = 1;\n"
    out += "\telse throw std::runtime_error(\"Duplicate element \" + std::string(node.name()) + \" in <%s>.\");\n" % t.name

    out += "\tswitch(in){\n"
    for el in t.content.children:
        out += "\tcase gtok_%s::%s:\n" % (t.cpp, utils.to_token(el.name))
        out += utils.indent(_gen_load_element(el, "out"), 2)
        out += "\t\tbreak;\n"
    out += "\tdefault: break; /* Not possible. */\n"
    out += "\t}\n"
    out += "}\n"

    mask = "".join(["1" if x.optional else "0"
                    for x in t.content.children][::-1])
    out += "std::bitset<%d> test_gstate = gstate | std::bitset<%d>(0b%s);\n" % (
        N, N, mask)
    out += "if(!test_gstate.all()) all_error(test_gstate, gtok_lookup_%s);\n" % t.cpp

    return out
Example #10
0
def to_capnpcase(x: str) -> str:
    x = utils.to_token(x)  # normalize
    y = [w[0] + w[1:].lower() for w in x.split("_")]
    return "".join(y)
Example #11
0
def render_header_file(schema: UxsdSchema, cmdline: str,
                       input_file: str) -> str:
    """Render a C++ header file to a string."""
    out = ""
    x = {
        "version": __version__,
        "cmdline": cmdline,
        "input_file": input_file,
        "md5": utils.md5(input_file)
    }
    out += cpp_templates.header_comment.substitute(x)
    out += cpp_templates.includes
    out += cpp_templates.collapsed_vec_defn
    out += cpp_templates.char_pool_defn
    out += "\n/* All uxsdcxx functions and structs live in this namespace. */\n"
    out += "namespace uxsd {"

    out += "\n\n/* Forward decl of generated data types. Needed for the pools.\n"
    out += " * The types are sorted according to tree height, so that the \"root type\"\n"
    out += " * appears last and we don't get any \"incomplete type\" errors. */\n"
    struct_decls = ["struct %s;" % t.cpp for t in schema.unions
                    ] + ["struct %s;" % t.cpp for t in schema.complex_types]
    out += "\n".join(struct_decls)

    out += "\n\n/* Global shared pools for storing multiply-occurring elements. */\n"
    extern_pool_decls = []
    for t in schema.pool_types:
        extern_pool_decls.append("extern std::vector <%s> %s_pool;" %
                                 (t.cpp, t.name))
    out += "\n".join(extern_pool_decls)
    out += "\n"
    if schema.has_string:
        out += "\nextern char_pool_impl char_pool;\n"
    out += "\n/* Helper function for freeing the pools. */\n"
    out += "void clear_pools(void);"
    if schema.has_string:
        out += "\n/* One may want to use the allocated strings after loading, so this\n"
        out += " * function is provided separately. */\n"
        out += "void clear_strings(void);"

    out += "\n\n/* Enum tokens generated from XSD enumerations. */\n"
    enum_tokens = [tokens_from_enum(t) for t in schema.enums]
    out += "\n".join(enum_tokens)

    type_tag_tokens = [
        utils.to_token(x.cpp) for x in schema.simple_types_in_unions
    ]
    if type_tag_tokens:
        out += "\n\n/* Type tag enum for tagged unions. */\n"
        out += "enum class type_tag {%s};\n" % ", ".join(type_tag_tokens)

    union_defns = [typedefn_from_union(t) for t in schema.unions]
    if union_defns:
        out += "\n\n/* Structs generated from  XSD unions. */\n"
        out += "\n\n".join(union_defns)

    struct_defns = [
        typedefn_from_complex_type(t) for t in schema.complex_types
    ]
    out += "\n\n/* Structs generated from complex types. */\n\n"
    out += "\n\n".join(struct_defns)
    root_element_decls = [
        typedefn_from_root_element(el) for el in schema.root_elements
    ]
    out += "\n\n/* Classes generated from root elements. */\n"
    out += "\n\n".join(root_element_decls)
    out += "\n} /* namespace uxsd */\n"
    return out