Beispiel #1
0
def xml_parse(filename, path=()):
    h = parsers.XMLParser()
    p = xml.sax.make_parser()
    p.setContentHandler(h)
    p.setErrorHandler(ErrorHandler())
    p.setEntityResolver(Resolver(path))
    p.parse(filename)
    return h.parser.tree
def xml_parse(filename, path=()):
    source = "file://%s" % os.path.abspath(filename)
    h = parsers.XMLParser()
    p = xml.sax.make_parser()
    p.setContentHandler(h)
    p.setErrorHandler(ErrorHandler())
    p.setEntityResolver(Resolver(path))
    p.parse(source)
    return h.parser.tree
Beispiel #3
0
def create_parser(store):
    parser = make_parser()
    # Workaround for bug in expatreader.py. Needed when
    # expatreader is trying to guess a prefix.
    parser.start_namespace_decl("xml", "http://www.w3.org/XML/1998/namespace")
    parser.setFeature(handler.feature_namespaces, 1)
    trix = TriXHandler(store)
    parser.setContentHandler(trix)
    parser.setErrorHandler(ErrorHandler())
    return parser
Beispiel #4
0
def create_parser(store):
    parser = make_parser()
    # Workaround for bug in expatreader.py. Needed when
    # expatreader is trying to guess a prefix.
    parser.start_namespace_decl("xml", "http://www.w3.org/XML/1998/namespace")
    parser.setFeature(handler.feature_namespaces, 1)
    rdfxml = RDFXMLHandler(store)
    #rdfxml.setDocumentLocator(_Locator(self.url, self.parser))
    parser.setContentHandler(rdfxml)
    parser.setErrorHandler(ErrorHandler())
    return parser
Beispiel #5
0
def parseSparqlResults(store, resultString):
    parser = make_parser()
    # Workaround for bug in expatreader.py. Needed when
    # expatreader is trying to guess a prefix.
    parser.start_namespace_decl("xml", "http://www.w3.org/XML/1998/namespace")
    parser.setFeature(handler.feature_namespaces, 1)
    sparqlResults = SparqlResultsHandler(store)
    parser.setContentHandler(sparqlResults)
    parser.setErrorHandler(ErrorHandler())
    sparqlResults.setDocumentLocator(parser)
    parser.parse(resultString)
    return sparqlResults.results
Beispiel #6
0
def xml_parse(filename, path=()):
    if sys.version_info[0:2] == (2, 3):
        # XXX: this is for older versions of python
        source = "file://%s" % os.path.abspath(filename)
    else:
        source = filename
    h = parsers.XMLParser()
    p = xml.sax.make_parser()
    p.setContentHandler(h)
    p.setErrorHandler(ErrorHandler())
    p.setEntityResolver(Resolver(path))
    p.parse(source)
    return h.parser.tree
def create_parser(target, store):
    parser = make_parser()
    try:
        # Workaround for bug in expatreader.py. Needed when
        # expatreader is trying to guess a prefix.
        parser.start_namespace_decl("xml", "http://www.w3.org/XML/1998/namespace")
    except AttributeError:
        pass # Not present in Jython (at least)
    parser.setFeature(handler.feature_namespaces, 1)
    rdfxml = RDFXMLHandler(store)
    rdfxml.setDocumentLocator(target)
    #rdfxml.setDocumentLocator(_Locator(self.url, self.parser))
    parser.setContentHandler(rdfxml)
    parser.setErrorHandler(ErrorHandler())
    return parser
Beispiel #8
0
def changesets_from_svnlog(log, repository, chunksize=2**15):
    from xml.sax import make_parser
    from xml.sax.handler import ContentHandler, ErrorHandler
    from datetime import datetime
    from vcpx.changes import ChangesetEntry, Changeset

    def get_entry_from_path(path, module=repository.module):
        # Given the repository url of this wc, say
        #   "http://server/plone/CMFPlone/branches/Plone-2_0-branch"
        # extract the "entry" portion (a relative path) from what
        # svn log --xml says, ie
        #   "/CMFPlone/branches/Plone-2_0-branch/tests/PloneTestCase.py"
        # that is to say "tests/PloneTestCase.py"

        if not module.endswith('/'):
            module = module + '/'
        if path.startswith(module):
            relative = path[len(module):]
            return relative

        # The path is outside our tracked tree...
        repository.log.warning('Ignoring %r since it is not under %r', path,
                               module)
        return None

    class SvnXMLLogHandler(ContentHandler):
        # Map between svn action and tailor's.
        # NB: 'R', in svn parlance, means REPLACED, something other
        # system may view as a simpler ADD, taking the following as
        # the most common idiom::
        #
        #   # Rename the old file with a better name
        #   $ svn mv somefile nicer-name-scheme.py
        #
        #   # Be nice with lazy users
        #   $ echo "exec nicer-name-scheme.py" > somefile
        #
        #   # Add the wrapper with the old name
        #   $ svn add somefile
        #
        #   $ svn commit -m "Longer name for somefile"

        ACTIONSMAP = {
            'R': 'R',  # will be ChangesetEntry.ADDED
            'M': ChangesetEntry.UPDATED,
            'A': ChangesetEntry.ADDED,
            'D': ChangesetEntry.DELETED
        }

        def __init__(self):
            self.changesets = []
            self.current = None
            self.current_field = []
            self.renamed = {}
            self.copies = []

        def startElement(self, name, attributes):
            if name == 'logentry':
                self.current = {}
                self.current['revision'] = attributes['revision']
                self.current['entries'] = []
                self.copies = []
            elif name in ['author', 'date', 'msg']:
                self.current_field = []
            elif name == 'path':
                self.current_field = []
                if attributes.has_key('copyfrom-path'):
                    self.current_path_action = (attributes['action'],
                                                attributes['copyfrom-path'],
                                                attributes['copyfrom-rev'])
                else:
                    self.current_path_action = attributes['action']

        def endElement(self, name):
            if name == 'logentry':
                # Sort the paths to make tests easier
                self.current['entries'].sort(lambda a, b: cmp(a.name, b.name))

                # Eliminate "useless" entries: SVN does not have atomic
                # renames, but rather uses a ADD+RM duo.
                #
                # So cycle over all entries of this patch, discarding
                # the deletion of files that were actually renamed, and
                # at the same time change related entry from ADDED to
                # RENAMED.

                # When copying a directory from another location in the
                # repository (outside the tracked tree), SVN will report files
                # below this dir that are not being committed as being
                # removed.

                # We thus need to change the action_kind for all entries
                # that are below a dir that was "copyfrom" from a path
                # outside of this module:
                #  D -> Remove entry completely (it's not going to be in here)
                #  (M,A,R) -> A

                mv_or_cp = {}
                for e in self.current['entries']:
                    if e.action_kind == e.ADDED and e.old_name is not None:
                        mv_or_cp[e.old_name] = e

                def parent_was_copied(n):
                    for p in self.copies:
                        if n.startswith(p + '/'):
                            return True
                    return False

                # Find renames from deleted directories:
                # $ svn mv dir/a.txt a.txt
                # $ svn del dir
                def check_renames_from_dir(name):
                    for e in mv_or_cp.values():
                        if e.old_name.startswith(name + '/'):
                            e.action_kind = e.RENAMED

                entries = []
                entries2 = []
                for e in self.current['entries']:
                    if e.action_kind == e.DELETED:
                        if mv_or_cp.has_key(e.name):
                            mv_or_cp[e.name].action_kind = e.RENAMED
                        else:
                            check_renames_from_dir(e.name)
                            entries2.append(e)
                    elif e.action_kind == 'R':
                        # In svn parlance, 'R' means Replaced: a typical
                        # scenario is
                        #   $ svn mv a.txt b.txt
                        #   $ touch a.txt
                        #   $ svn add a.txt
                        if mv_or_cp.has_key(e.name):
                            mv_or_cp[e.name].action_kind = e.RENAMED
                        else:
                            check_renames_from_dir(e.name)
                        e.action_kind = e.ADDED
                        entries2.append(e)
                    elif parent_was_copied(e.name):
                        if e.action_kind != e.DELETED:
                            e.action_kind = e.ADDED
                            entries.append(e)
                    else:
                        entries.append(e)

                # Changes sort: first MODIFY|ADD|RENAME, than REPLACE|DELETE
                for e in entries2:
                    entries.append(e)

                svndate = self.current['date']
                # 2004-04-16T17:12:48.000000Z
                y, m, d = map(int, svndate[:10].split('-'))
                hh, mm, ss = map(int, svndate[11:19].split(':'))
                ms = int(svndate[20:-1])
                timestamp = datetime(y, m, d, hh, mm, ss, ms, UTC)

                changeset = Changeset(self.current['revision'], timestamp,
                                      self.current.get('author'),
                                      self.current['msg'], entries)
                self.changesets.append(changeset)
                self.current = None
            elif name in ['author', 'date', 'msg']:
                self.current[name] = ''.join(self.current_field)
            elif name == 'path':
                path = ''.join(self.current_field)
                entrypath = get_entry_from_path(path)
                if entrypath:
                    entry = ChangesetEntry(entrypath)

                    if type(self.current_path_action) == type(()):
                        self.copies.append(entry.name)
                        old = get_entry_from_path(self.current_path_action[1])
                        if old:
                            entry.action_kind = self.ACTIONSMAP[
                                self.current_path_action[0]]
                            entry.old_name = old
                            self.renamed[entry.old_name] = True
                        else:
                            entry.action_kind = entry.ADDED
                    else:
                        entry.action_kind = self.ACTIONSMAP[
                            self.current_path_action]

                    self.current['entries'].append(entry)

        def characters(self, data):
            self.current_field.append(data)

    parser = make_parser()
    handler = SvnXMLLogHandler()
    parser.setContentHandler(handler)
    parser.setErrorHandler(ErrorHandler())

    chunk = log.read(chunksize)
    while chunk:
        parser.feed(chunk)
        for cs in handler.changesets:
            yield cs
        handler.changesets = []
        chunk = log.read(chunksize)
    parser.close()
    for cs in handler.changesets:
        yield cs
Beispiel #9
0
 def parse(self, input, errorHandler=ErrorHandler()):
     xml.sax.parse(input, self, errorHandler)
Beispiel #10
0
def changesets_from_darcschanges_unsafe(changes, unidiff=False, repodir=None,
                                        chunksize=2**15, replace_badchars=None):
    """
    Do the real work of parsing the change log, including tags.
    Warning: the tag information in the changsets returned by this
    function are only correct if each darcs tag in the repo depends on
    all of the patches that precede it.  This is not a valid
    assumption in general--a tag that does not depend on patch P can
    be pulled in from another darcs repo after P.  We collect the tag
    info anyway because DarcsWorkingDir._currentTags() can use it
    safely despite this problem.  Hopefully the problem will
    eventually be fixed and this function can be renamed
    changesets_from_darcschanges.
    """
    from xml.sax import make_parser
    from xml.sax.handler import ContentHandler, ErrorHandler
    from datetime import datetime

    class DarcsXMLChangesHandler(ContentHandler):
        def __init__(self):
            self.changesets = []
            self.current = None
            self.current_field = []
            if unidiff and repodir:
                cmd = ["darcs", "diff", "--unified", "--repodir", repodir,
                       "--patch", "%(patchname)s"]
                self.darcsdiff = ExternalCommand(command=cmd)
            else:
                self.darcsdiff = None

        def startElement(self, name, attributes):
            if name == 'patch':
                self.current = {}
                self.current['author'] = attributes['author']
                date = attributes['date']
                from time import strptime
                try:
                    # 20040619130027
                    timestamp = datetime(*strptime(date, '%Y%m%d%H%M%S')[:6])
                except ValueError:
                    # Old darcs patches use the form Sun Oct 20 20:01:05 EDT 2002
                    timestamp = datetime(*strptime(date[:19] + date[-5:], '%a %b %d %H:%M:%S %Y')[:6])

                timestamp = timestamp.replace(tzinfo=UTC) # not true for the ValueError case, but oh well

                self.current['date'] = timestamp
                self.current['comment'] = ''
                self.current['hash'] = attributes['hash']
                self.current['entries'] = []
                self.inverted = (attributes['inverted'] == "True")
            elif name in ['name', 'comment', 'add_file', 'add_directory',
                          'modify_file', 'remove_file', 'remove_directory']:
                self.current_field = []
            elif name == 'move':
                self.old_name = attributes['from']
                self.new_name = attributes['to']

        def endElement(self, name):
            if name == 'patch':
                cset = DarcsChangeset(self.current['name'],
                                      self.current['date'],
                                      self.current['author'],
                                      self.current['comment'],
                                      self.current['entries'],
                                      tags=self.current.get('tags',[]),
                                      darcs_hash=self.current['hash'])
                if self.darcsdiff:
                    cset.unidiff = self.darcsdiff.execute(TZ='UTC',
                        stdout=PIPE, patchname=cset.revision)[0].read()

                self.changesets.append(cset)
                self.current = None
            elif name in ['name', 'comment']:
                val = ''.join(self.current_field)
                if val[:4] == 'TAG ':
                    self.current.setdefault('tags',[]).append(val[4:])
                self.current[name] = val
            elif name == 'move':
                entry = ChangesetEntry(self.new_name)
                entry.action_kind = entry.RENAMED
                entry.old_name = self.old_name
                self.current['entries'].append(entry)
            elif name in ['add_file', 'add_directory', 'modify_file',
                          'remove_file', 'remove_directory']:
                current_field = ''.join(self.current_field).strip()
                if self.inverted:
                    # the filenames in file modifications are outdated
                    # if there are renames
                    for i in self.current['entries']:
                        if i.action_kind == i.RENAMED and current_field.startswith(i.old_name):
                            current_field = current_field.replace(i.old_name, i.name)
                entry = ChangesetEntry(current_field)
                entry.action_kind = { 'add_file': entry.ADDED,
                                      'add_directory': entry.ADDED,
                                      'modify_file': entry.UPDATED,
                                      'remove_file': entry.DELETED,
                                      'remove_directory': entry.DELETED
                                    }[name]
                entry.is_directory = name.endswith('directory')
                self.current['entries'].append(entry)

        def characters(self, data):
            self.current_field.append(data)

    parser = make_parser()
    handler = DarcsXMLChangesHandler()
    parser.setContentHandler(handler)
    parser.setErrorHandler(ErrorHandler())

    def fixup_badchars(s, map):
        if not map:
            return s

        ret = [map.get(c, c) for c in s]
        return "".join(ret)

    chunk = fixup_badchars(changes.read(chunksize), replace_badchars)
    while chunk:
        parser.feed(chunk)
        for cs in handler.changesets:
            yield cs
        handler.changesets = []
        chunk = fixup_badchars(changes.read(chunksize), replace_badchars)
    parser.close()
    for cs in handler.changesets:
        yield cs
Beispiel #11
0
            textMethod(content)
        else:
            endMethod = self.__getEndMethod()
            if endMethod:
                endMethod()
            self.__pop()
        self.__content = ''

    def characters(self, content: str) -> None:
        if content and not self.__text and not content.isspace():
            raise ParseError(f'node {self.__getContext()} is not supposed to '
                             f'contain text but contains "{content}"')
        self.__content += content


_errorHandler = ErrorHandler()

_interningDict: Dict[str, str] = {}

# XML tag class:


class XMLTag(ABC):
    tagName: ClassVar[str] = abstract
    boolProperties: ClassVar[Sequence[str]] = ()
    intProperties: ClassVar[Sequence[str]] = ()
    enumProperties: ClassVar[Mapping[str, Type[Enum]]] = {}

    @classmethod
    def _findDeclarations(cls, name: str) -> Iterator[Any]:
        '''Yields declarations with the given `name` in this class