Example #1
0
 def __init__(self, root_tag, root_factory, **kwargs):
     self.encoding = kwargs.pop('encoding', None)
     if not callable(root_factory):
         raise SerializableAPIError("Factory not callable")
     self.root_tag = root_tag
     self.root_factory = root_factory
     self.root = None
     self.stack = []
     self.dcache = ''
     if self.encoding is None:
         self.parser = expat.ParserCreate()
     else:
         self.parser = expat.ParserCreate(self.encoding)
     self.parser.StartElementHandler = self.start_element_handler
     self.parser.EndElementHandler = self.end_element_handler
     self.parser.CharacterDataHandler = self.character_data_handler
Example #2
0
def xml_parse(xml_input,
              encoding=None,
              expat=expat,
              process_namespaces=False,
              namespace_separator=':',
              disable_entities=True,
              **kwargs):

    handler = _DictSAXHandler(namespace_separator=namespace_separator,
                              **kwargs)

    if isinstance(xml_input, _unicode):
        if not encoding:
            encoding = 'utf-8'

        xml_input = xml_input.encode(encoding)

    if not process_namespaces:
        namespace_separator = None

    parser = expat.ParserCreate(encoding, namespace_separator)

    try:
        parser.ordered_attributes = True
    except AttributeError:
        pass

    parser.StartNamespaceDeclHandler = handler.startNamespaceDecl
    parser.StartElementHandler = handler.startElement
    parser.EndElementHandler = handler.endElement
    parser.CharacterDataHandler = handler.characters
    parser.buffer_text = True

    if disable_entities:
        try:
            feature = "http://apache.org/xml/features/disallow-doctype-decl"
            parser._reader.setFeature(feature, True)
        except AttributeError:
            parser.DefaultHandler = lambda x: None
            parser.ExternalEntityRefHandler = lambda *x: 1

    if hasattr(xml_input, 'read'):
        parser.ParseFile(xml_input)
    else:
        parser.Parse(xml_input, True)

    return handler.item
Example #3
0
def parse(xml_input, encoding=None, expat=expat, process_namespaces=False, namespace_separator=':', **kwargs):
    handler = _DictSAXHandler(namespace_separator=namespace_separator, **kwargs)
    if isinstance(xml_input, _unicode):
        if not encoding:
            encoding = 'utf-8'
        xml_input = xml_input.encode(encoding)
    if not process_namespaces:
        namespace_separator = None
    parser = expat.ParserCreate(encoding, namespace_separator)
    try:
        parser.ordered_attributes = True
    except AttributeError:
        # Jython's expat does not support ordered_attributes
        pass
    parser.StartElementHandler = handler.startElement
    parser.EndElementHandler = handler.endElement
    parser.CharacterDataHandler = handler.characters
    parser.buffer_text = True
    try:
        parser.ParseFile(xml_input)
    except (TypeError, AttributeError):
        parser.Parse(xml_input, True)
    return handler.item
Example #4
0
def parse(xml_input, encoding=None, expat=expat, process_namespaces=False,
          namespace_separator=':', disable_entities=True, **kwargs):
    """Parse the given XML input and convert it into a dictionary.

    `xml_input` can either be a `string` or a file-like object.

    If `xml_attribs` is `True`, element attributes are put in the dictionary
    among regular child elements, using `@` as a prefix to avoid collisions. If
    set to `False`, they are just ignored.



    If `item_depth` is `0`, the function returns a dictionary for the root
    element (default behavior). Otherwise, it calls `item_callback` every time
    an item at the specified depth is found and returns `None` in the end
    (streaming mode).

    The callback function receives two parameters: the `path` from the document
    root to the item (name-attribs pairs), and the `item` (dict). If the
    callback's return value is false-ish, parsing will be stopped with the
    :class:`ParsingInterrupted` exception.

    Streaming example::



    The optional argument `postprocessor` is a function that takes `path`,
    `key` and `value` as positional arguments and returns a new `(key, value)`
    pair where both `key` and `value` may have changed. Usage example::


        OrderedDict([(u'a', OrderedDict([(u'b:int', [1, 2]), (u'b', u'x')]))])

    You can pass an alternate version of `expat` (such as `defusedexpat`) by
    using the `expat` parameter. E.g:


    You can use the force_list argument to force lists to be created even
    when there is only a single child of a given level of hierarchy. The
    force_list argument is a tuple of keys. If the key for a given level
    of hierarchy is in the force_list argument, that level of hierarchy
    will have a list as a child (even if there is only one sub-element).
    The index_keys operation takes precendence over this. This is applied
    after any user-supplied postprocessor has already run.

        For example, given this input:
        <servers>
          <server>
            <name>host1</name>
            <os>Linux</os>
            <interfaces>
              <interface>
                <name>em0</name>
                <ip_address>10.0.0.1</ip_address>
              </interface>
            </interfaces>
          </server>
        </servers>

        If called with force_list=('interface',), it will produce
        this dictionary:
        {'servers':
          {'server':
            {'name': 'host1',
             'os': 'Linux'},
             'interfaces':
              {'interface':
                [ {'name': 'em0', 'ip_address': '10.0.0.1' } ] } } }

        `force_list` can also be a callable that receives `path`, `key` and
        `value`. This is helpful in cases where the logic that decides whether
        a list should be forced is more complex.
    """
    handler = _DictSAXHandler(namespace_separator=namespace_separator,
                              **kwargs)
    if isinstance(xml_input, _unicode):
        if not encoding:
            encoding = 'utf-8'
        xml_input = xml_input.encode(encoding)
    if not process_namespaces:
        namespace_separator = None
    parser = expat.ParserCreate(
        encoding,
        namespace_separator
    )
    try:
        parser.ordered_attributes = True
    except AttributeError:
        # Jython's expat does not support ordered_attributes
        pass
    parser.StartNamespaceDeclHandler = handler.startNamespaceDecl
    parser.StartElementHandler = handler.startElement
    parser.EndElementHandler = handler.endElement
    parser.CharacterDataHandler = handler.characters
    parser.buffer_text = True
    if disable_entities:
        try:
            # Attempt to disable DTD in Jython's expat parser (Xerces-J).
            feature = "http://apache.org/xml/features/disallow-doctype-decl"
            parser._reader.setFeature(feature, True)
        except AttributeError:
            # For CPython / expat parser.
            # Anything not handled ends up here and entities aren't expanded.
            parser.DefaultHandler = lambda x: None
            # Expects an integer return; zero means failure -> expat.ExpatError.
            parser.ExternalEntityRefHandler = lambda *x: 1
    if hasattr(xml_input, 'read'):
        parser.ParseFile(xml_input)
    else:
        parser.Parse(xml_input, True)
    return handler.item
def parse(xml_input,
          encoding=None,
          expat=expat,
          process_namespaces=False,
          namespace_separator=':',
          **kwargs):
    """
        Parse the given XML input and convert it into a dictionary.

        `xml_input` can either be a `string` or a file-like object.

        If `xml_attribs` is `True`, element attributes are put in the dictionary
        among regular child elements, using `@` as a prefix to avoid collisions. If
        set to `False`, they are just ignored.

        Simple example::

            >>> import xmltodict
            >>> doc = xmltodict.parse(\"\"\"
            ... <a prop="x">
            ...   <b>1</b>
            ...   <b>2</b>
            ... </a>
            ... \"\"\")
            >>> doc['a']['@prop']
            u'x'
            >>> doc['a']['b']
            [u'1', u'2']

        If `item_depth` is `0`, the function returns a dictionary for the root
        element (default behavior). Otherwise, it calls `item_callback` every time
        an item at the specified depth is found and returns `None` in the end
        (streaming mode).

        The callback function receives two parameters: the `path` from the document
        root to the item (name-attribs pairs), and the `item` (dict). If the
        callback's return value is false-ish, parsing will be stopped with the
        :class:`ParsingInterrupted` exception.

        Streaming example::

            >>> def handle(path, item):
            ...     print 'path:%s item:%s' % (path, item)
            ...     return True
            ...
            >>> xmltodict.parse(\"\"\"
            ... <a prop="x">
            ...   <b>1</b>
            ...   <b>2</b>
            ... </a>\"\"\", item_depth=2, item_callback=handle)
            path:[(u'a', {u'prop': u'x'}), (u'b', None)] item:1
            path:[(u'a', {u'prop': u'x'}), (u'b', None)] item:2

        The optional argument `postprocessor` is a function that takes `path`,
        `key` and `value` as positional arguments and returns a new `(key, value)`
        pair where both `key` and `value` may have changed. Usage example::

            >>> def postprocessor(path, key, value):
            ...     try:
            ...         return key + ':int', int(value)
            ...     except (ValueError, TypeError):
            ...         return key, value
            >>> xmltodict.parse('<a><b>1</b><b>2</b><b>x</b></a>',
            ...                 postprocessor=postprocessor)
            OrderedDict([(u'a', OrderedDict([(u'b:int', [1, 2]), (u'b', u'x')]))])

        You can pass an alternate version of `expat` (such as `defusedexpat`) by
        using the `expat` parameter. E.g:

            >>> import defusedexpat
            >>> xmltodict.parse('<a>hello</a>', expat=defusedexpat.pyexpat)
            OrderedDict([(u'a', u'hello')])

        You can use the force_list argument to force lists to be created even
        when there is only a single child of a given level of hierarchy. The
        force_list argument is a tuple of keys. If the key for a given level
        of hierarchy is in the force_list argument, that level of hierarchy
        will have a list as a child (even if there is only one sub-element).
        The index_keys operation takes precendence over this. This is applied
        after any user-supplied postprocessor has already run.

            For example, given this input:
            <servers>
              <server>
                <name>host1</name>
                <os>Linux</os>
                <interfaces>
                  <interface>
                    <name>em0</name>
                    <ip_address>10.0.0.1</ip_address>
                  </interface>
                </interfaces>
              </server>
            </servers>

            If called with force_list=('interface',), it will produce
            this dictionary:
            {'servers':
              {'server':
                {'name': 'host1',
                 'os': 'Linux'},
                 'interfaces':
                  {'interface':
                    [ {'name': 'em0', 'ip_address': '10.0.0.1' } ] } } }

            `force_list` can also be a callable that receives `path`, `key` and
            `value`. This is helpful in cases where the logic that decides whether
            a list should be forced is more complex.
    """
    handler = _DictSAXHandler(namespace_separator=namespace_separator,
                              **kwargs)

    if isinstance(xml_input, _unicode):

        if not encoding:
            encoding = 'utf-8'

        xml_input = xml_input.encode(encoding)

    if not process_namespaces:
        namespace_separator = None

    parser = expat.ParserCreate(encoding, namespace_separator)

    try:
        parser.ordered_attributes = True

    except AttributeError:
        # Jython's expat does not support ordered_attributes
        pass

    parser.StartElementHandler = handler.startElement
    parser.EndElementHandler = handler.endElement
    parser.CharacterDataHandler = handler.characters
    parser.buffer_text = True

    try:
        parser.ParseFile(xml_input)

    except (TypeError, AttributeError):
        parser.Parse(xml_input, True)

    return handler.item
Example #6
0
def parse(xml_input, encoding=None, expat=expat, process_namespaces=False,
          namespace_separator=':', disable_entities=True,
          ordered_mixed_children=False, **kwargs):
    """Parse the given XML input and convert it into a dictionary.

    `xml_input` can either be a `string` or a file-like object.

    If `xml_attribs` is `True`, element attributes are put in the dictionary
    among regular child elements, using `@` as a prefix to avoid collisions. If
    set to `False`, they are just ignored.

    Simple example::

        >>> import xmltodict
        >>> doc = xmltodict.parse(\"\"\"
        ... <a prop="x">
        ...   <b>1</b>
        ...   <b>2</b>
        ... </a>
        ... \"\"\")
        >>> doc['a']['@prop']
        u'x'
        >>> doc['a']['b']
        [u'1', u'2']

    If `item_depth` is `0`, the function returns a dictionary for the root
    element (default behavior). Otherwise, it calls `item_callback` every time
    an item at the specified depth is found and returns `None` in the end
    (streaming mode).

    The callback function receives two parameters: the `path` from the document
    root to the item (name-attribs pairs), and the `item` (dict). If the
    callback's return value is false-ish, parsing will be stopped with the
    :class:`ParsingInterrupted` exception.

    Streaming example::

        >>> def handle(path, item):
        ...     print('path:%s item:%s' % (path, item))
        ...     return True
        ...
        >>> xmltodict.parse(\"\"\"
        ... <a prop="x">
        ...   <b>1</b>
        ...   <b>2</b>
        ... </a>\"\"\", item_depth=2, item_callback=handle)
        path:[(u'a', {u'prop': u'x'}), (u'b', None)] item:1
        path:[(u'a', {u'prop': u'x'}), (u'b', None)] item:2

    The optional argument `postprocessor` is a function that takes `path`,
    `key` and `value` as positional arguments and returns a new `(key, value)`
    pair where both `key` and `value` may have changed. Usage example::

        >>> def postprocessor(path, key, value):
        ...     try:
        ...         return key + ':int', int(value)
        ...     except (ValueError, TypeError):
        ...         return key, value
        >>> xmltodict.parse('<a><b>1</b><b>2</b><b>x</b></a>',
        ...                 postprocessor=postprocessor)
        OrderedDict([(u'a', OrderedDict([(u'b:int', [1, 2]), (u'b', u'x')]))])

    You can pass an alternate version of `expat` (such as `defusedexpat`) by
    using the `expat` parameter. E.g:

        >>> import defusedexpat
        >>> xmltodict.parse('<a>hello</a>', expat=defusedexpat.pyexpat)
        OrderedDict([(u'a', u'hello')])

    You can use the force_list argument to force lists to be created even
    when there is only a single child of a given level of hierarchy. The
    force_list argument is a tuple of keys. If the key for a given level
    of hierarchy is in the force_list argument, that level of hierarchy
    will have a list as a child (even if there is only one sub-element).
    The index_keys operation takes precendence over this. This is applied
    after any user-supplied postprocessor has already run.

    For example, given this input:
    <servers>
      <server>
        <name>host1</name>
        <os>Linux</os>
        <interfaces>
          <interface>
            <name>em0</name>
            <ip_address>10.0.0.1</ip_address>
          </interface>
        </interfaces>
      </server>
    </servers>

    If called with force_list=('interface',), it will produce
    this dictionary:
    {'servers':
      {'server':
        {'name': 'host1',
         'os': 'Linux'},
         'interfaces':
          {'interface':
            [ {'name': 'em0', 'ip_address': '10.0.0.1' } ] } } }

    `force_list` can also be a callable that receives `path`, `key` and
    `value`. This is helpful in cases where the logic that decides whether
    a list should be forced is more complex.

    The parameter ordered_mixed_children will cause the parser to add an
    attribute to each element in the data with a key `@__order__`, with a value
    that corresponds to the element's processing order within the document.
    By default, mixed child elements are grouped by name and only keep their
    relative order. Sometimes the order does matter, but the system you're
    working with doesn't have any other way to indicate order than by the
    coincidence of order in the document.

    For example, this input:
    <a>
      <b>1</b>
      <c>2</c>
      <b>3</b>
    </a>

    Would normally be parsed as:
    {"a": {"b": [1, 3], "c": 2}}

    This would then be unparsed as:
    <a>
      <b>1</b>
      <b>3</b>
      <c>2</c>
    </a>

    With `ordered_mixed_children=True`, the order information is included so
    that the original input is produced when unparsing (the `@__order__`
    attribute is removed).
    {"a": {
      "@__order__": 1,
      "b": ({"@__order__": 2, "#text": 1},
           {"@__order__": 3, "#text": 3}),
      "c": {"@__order__": 4, "#text": 2}
      }
    }
    """
    handler = _DictSAXHandler(namespace_separator=namespace_separator,
                              ordered_mixed_children=ordered_mixed_children,
                              **kwargs)
    if isinstance(xml_input, _unicode):
        if not encoding:
            encoding = 'utf-8'
        xml_input = xml_input.encode(encoding)
    if not process_namespaces:
        namespace_separator = None
    parser = expat.ParserCreate(
        encoding,
        namespace_separator
    )
    try:
        parser.ordered_attributes = True
    except AttributeError:
        # Jython's expat does not support ordered_attributes
        pass
    parser.StartNamespaceDeclHandler = handler.startNamespaceDecl
    parser.StartElementHandler = handler.startElement
    parser.EndElementHandler = handler.endElement
    parser.CharacterDataHandler = handler.characters
    parser.buffer_text = True
    if disable_entities:
        try:
            # Attempt to disable DTD in Jython's expat parser (Xerces-J).
            feature = "http://apache.org/xml/features/disallow-doctype-decl"
            parser._reader.setFeature(feature, True)
        except AttributeError:
            # For CPython / expat parser.
            # Anything not handled ends up here and entities aren't expanded.
            parser.DefaultHandler = lambda x: None
            # Expects an integer return; zero means failure -> expat.ExpatError.
            parser.ExternalEntityRefHandler = lambda *x: 1
    if hasattr(xml_input, 'read'):
        parser.ParseFile(xml_input)
    else:
        parser.Parse(xml_input, True)
    return handler.item
Example #7
0
def parse(xml_input,
          encoding=None,
          expat=expat,
          process_namespaces=False,
          namespace_separator=':',
          disable_entities=True,
          **kwargs):
    force_list = [
        'domainSetInfo', 'evaluationCriteria',
        'inputInfoType_model.resourceType', 'relationInfo',
        'domainSetInfo.domainId', 'annotationInfo', 'validationInfo',
        'textClassificationInfo', 'telephoneNumber', 'requiredLRs',
        'annotationManual', 'metadataLanguageName', 'appropriatenessForDSI',
        'evaluationTool', 'operatingSystem', 'metadataLanguageId',
        'originalSource', 'documentation', 'segmentationLevel',
        'encodingLevel', 'variant', 'fundingCountryId', 'funder',
        'languageSetInfo', 'inputInfoType_model.annotationType',
        'outputInfoType_model.annotationType', 'affiliation', 'fundingType',
        'validator', 'identifier', 'theoreticModel', 'creationTool',
        'distributionInfo', 'licenceInfo', 'evaluationLevel',
        'sizePerLanguage', 'languageVarietyName', 'restrictionsOfUse',
        'domainSetInfo.domain', 'contactPerson', 'evaluationReport',
        'outputInfoType_model.resourceType', 'domainSetInfo.subdomainId',
        'evaluationMeasure', 'keywords', 'fundingCountry', 'url', 'author',
        'iprHolder', 'annotationTool', 'email', 'requiredSoftware',
        'domainInfo', 'languageVarietyInfo',
        'conformanceToStandardsBestPractices', 'characterEncodingInfo',
        'extratextualInformation', 'textFormatInfo', 'distributionMedium',
        'corpusTextInfo', 'implementationLanguage', 'publisher', 'externalRef',
        'languageInfo', 'resourceCreator', 'evaluator', 'executionLocation',
        'domainSetInfo.subdomain', 'samplesLocation', 'linguisticInformation',
        'function', 'fundingProject', 'downloadLocation', 'sizeInfo', 'editor',
        'task', 'extraTextualInformationUnit', 'metadataCreator',
        'validationReport', 'outputInfoType_model.mediaType'
    ]

    handler = Parser(namespace_separator=namespace_separator,
                     force_list=force_list,
                     **kwargs)
    if isinstance(xml_input, xmltodict._unicode):
        if not encoding:
            encoding = 'utf-8'
        xml_input = xml_input.encode(encoding)
    if not process_namespaces:
        namespace_separator = None
    parser = expat.ParserCreate(encoding, namespace_separator)
    try:
        parser.ordered_attributes = True
    except AttributeError:
        # Jython's expat does not support ordered_attributes
        pass
    parser.StartNamespaceDeclHandler = handler.startNamespaceDecl
    parser.StartElementHandler = handler.startElement
    parser.EndElementHandler = handler.endElement
    parser.CharacterDataHandler = handler.characters
    parser.buffer_text = True
    if disable_entities:
        try:
            # Attempt to disable DTD in Jython's expat parser (Xerces-J).
            feature = "http://apache.org/xml/features/disallow-doctype-decl"
            parser._reader.setFeature(feature, True)
        except AttributeError:
            # For CPython / expat parser.
            # Anything not handled ends up here and entities aren't expanded.
            parser.DefaultHandler = lambda x: None
            # Expects an integer return; zero means failure -> expat.ExpatError.
            parser.ExternalEntityRefHandler = lambda *x: 1
    if hasattr(xml_input, 'read'):
        parser.ParseFile(xml_input)
    else:
        parser.Parse(xml_input, True)
    return handler.item
Example #8
0
def parse(xml_input,
          encoding=None,
          expat=expat,
          process_namespaces=False,
          namespace_separator=':',
          disable_entities=True,
          process_comments=False,
          **kwargs):
    """Parse the given XML input and convert it into a dictionary.

    `xml_input` can either be a `string`, a file-like object, or a generator of strings.

    If `xml_attribs` is `True`, element attributes are put in the dictionary
    among regular child elements, using `@` as a prefix to avoid collisions. If
    set to `False`, they are just ignored.

    Simple example::

        >>> import xmltodict
        >>> doc = xmltodict.parse(\"\"\"
        ... <a prop="x">
        ...   <b>1</b>
        ...   <b>2</b>
        ... </a>
        ... \"\"\")
        >>> doc['a']['@prop']
        u'x'
        >>> doc['a']['b']
        [u'1', u'2']

    If `item_depth` is `0`, the function returns a dictionary for the root
    element (default behavior). Otherwise, it calls `item_callback` every time
    an item at the specified depth is found and returns `None` in the end
    (streaming mode).

    The callback function receives two parameters: the `path` from the document
    root to the item (name-attribs pairs), and the `item` (dict). If the
    callback's return value is false-ish, parsing will be stopped with the
    :class:`ParsingInterrupted` exception.

    Streaming example::

        >>> def handle(path, item):
        ...     print('path:%s item:%s' % (path, item))
        ...     return True
        ...
        >>> xmltodict.parse(\"\"\"
        ... <a prop="x">
        ...   <b>1</b>
        ...   <b>2</b>
        ... </a>\"\"\", item_depth=2, item_callback=handle)
        path:[(u'a', {u'prop': u'x'}), (u'b', None)] item:1
        path:[(u'a', {u'prop': u'x'}), (u'b', None)] item:2

    The optional argument `postprocessor` is a function that takes `path`,
    `key` and `value` as positional arguments and returns a new `(key, value)`
    pair where both `key` and `value` may have changed. Usage example::

        >>> def postprocessor(path, key, value):
        ...     try:
        ...         return key + ':int', int(value)
        ...     except (ValueError, TypeError):
        ...         return key, value
        >>> xmltodict.parse('<a><b>1</b><b>2</b><b>x</b></a>',
        ...                 postprocessor=postprocessor)
        {'a': {'b:int': [1, 2], 'b': 'x'}}

    You can pass an alternate version of `expat` (such as `defusedexpat`) by
    using the `expat` parameter. E.g:

        >>> import defusedexpat
        >>> xmltodict.parse('<a>hello</a>', expat=defusedexpat.pyexpat)
        {'a': 'hello'}

    You can use the force_list argument to force lists to be created even
    when there is only a single child of a given level of hierarchy. The
    force_list argument is a tuple of keys. If the key for a given level
    of hierarchy is in the force_list argument, that level of hierarchy
    will have a list as a child (even if there is only one sub-element).
    The index_keys operation takes precedence over this. This is applied
    after any user-supplied postprocessor has already run.

        For example, given this input:
        <servers>
          <server>
            <name>host1</name>
            <os>Linux</os>
            <interfaces>
              <interface>
                <name>em0</name>
                <ip_address>10.0.0.1</ip_address>
              </interface>
            </interfaces>
          </server>
        </servers>

        If called with force_list=('interface',), it will produce
        this dictionary:
        {'servers':
          {'server':
            {'name': 'host1',
             'os': 'Linux'},
             'interfaces':
              {'interface':
                [ {'name': 'em0', 'ip_address': '10.0.0.1' } ] } } }

        `force_list` can also be a callable that receives `path`, `key` and
        `value`. This is helpful in cases where the logic that decides whether
        a list should be forced is more complex.


        If `process_comment` is `True` then comment will be added with comment_key
        (default=`'#comment'`) to then tag which contains comment

            For example, given this input:
            <a>
              <b>
                <!-- b comment -->
                <c>
                    <!-- c comment -->
                    1
                </c>
                <d>2</d>
              </b>
            </a>

            If called with process_comment=True, it will produce
            this dictionary:
            'a': {
                'b': {
                    '#comment': 'b comment',
                    'c': {

                        '#comment': 'c comment',
                        '#text': '1',
                    },
                    'd': '2',
                },
            }
    """
    handler = _DictSAXHandler(namespace_separator=namespace_separator,
                              **kwargs)
    if isinstance(xml_input, _unicode):
        if not encoding:
            encoding = 'utf-8'
        xml_input = xml_input.encode(encoding)
    if not process_namespaces:
        namespace_separator = None
    parser = expat.ParserCreate(encoding, namespace_separator)
    try:
        parser.ordered_attributes = True
    except AttributeError:
        # Jython's expat does not support ordered_attributes
        pass
    parser.StartNamespaceDeclHandler = handler.startNamespaceDecl
    parser.StartElementHandler = handler.startElement
    parser.EndElementHandler = handler.endElement
    parser.CharacterDataHandler = handler.characters
    if process_comments:
        parser.CommentHandler = handler.comments
    parser.buffer_text = True
    if disable_entities:
        try:
            # Attempt to disable DTD in Jython's expat parser (Xerces-J).
            feature = "http://apache.org/xml/features/disallow-doctype-decl"
            parser._reader.setFeature(feature, True)
        except AttributeError:
            # For CPython / expat parser.
            # Anything not handled ends up here and entities aren't expanded.
            parser.DefaultHandler = lambda x: None
            # Expects an integer return; zero means failure -> expat.ExpatError.
            parser.ExternalEntityRefHandler = lambda *x: 1
    if hasattr(xml_input, 'read'):
        parser.ParseFile(xml_input)
    elif isgenerator(xml_input):
        for chunk in xml_input:
            parser.Parse(chunk, False)
        parser.Parse(b'', True)
    else:
        parser.Parse(xml_input, True)
    return handler.item