Exemple #1
0
def parse(url_or_path, encoding=None, handler_class=DrillHandler):
    """
    :param url_or_path: A file-like object, a filesystem path, a URL, or a string containing XML
    :rtype: :class:`XmlElement`
    """
    handler = handler_class()
    parser = expat.ParserCreate(encoding)
    parser.buffer_text = 1
    parser.StartElementHandler = handler.start_element
    parser.EndElementHandler = handler.end_element
    parser.CharacterDataHandler = handler.characters
    if isinstance(url_or_path, basestring):
        if '://' in url_or_path[:20]:
            with contextlib.closing(url_lib.urlopen(url_or_path)) as f:
                parser.ParseFile(f)
        elif url_or_path[:100].strip().startswith('<'):
            if isinstance(url_or_path, unicode):
                if encoding is None:
                    encoding = 'utf-8'
                url_or_path = url_or_path.encode(encoding)
            parser.Parse(url_or_path, True)
        else:
            with open(url_or_path, 'rb') as f:
                parser.ParseFile(f)
    elif PY3 and isinstance(url_or_path, bytes):
        parser.ParseFile(bytes_io(url_or_path))
    else:
        parser.ParseFile(url_or_path)
    return handler.root
Exemple #2
0
def parse(url_or_path, encoding=None, handler_class=DrillHandler):
    """
    :param url_or_path: A file-like object, a filesystem path, a URL, or a string containing XML
    :rtype: :class:`XmlElement`
    """
    handler = handler_class()
    parser = expat.ParserCreate(encoding)
    parser.buffer_text = 1
    parser.StartElementHandler = handler.start_element
    parser.EndElementHandler = handler.end_element
    parser.CharacterDataHandler = handler.characters
    if isinstance(url_or_path, basestring):
        if "://" in url_or_path[:20]:
            with contextlib.closing(url_lib.urlopen(url_or_path)) as f:
                parser.ParseFile(f)
        elif url_or_path[:100].strip().startswith("<"):
            if isinstance(url_or_path, unicode):
                if encoding is None:
                    encoding = "utf-8"
                url_or_path = url_or_path.encode(encoding)
            parser.Parse(url_or_path, True)
        else:
            with open(url_or_path, "rb") as f:
                parser.ParseFile(f)
    elif PY3 and isinstance(url_or_path, bytes):
        parser.ParseFile(bytes_io(url_or_path))
    else:
        parser.ParseFile(url_or_path)
    return handler.root
Exemple #3
0
    def xml(self, **kwargs):
        """
        Returns an XML representation of this node (including descendants). This method automatically creates an
        :class:`XmlWriter` instance internally to handle the writing.

        :param **kwargs: Any named arguments are passed along to the :class:`XmlWriter` constructor
        """
        s = bytes_io()
        writer = XmlWriter(s, **kwargs)
        self.write(writer)
        return s.getvalue()
Exemple #4
0
def get_pure_py_file_map(t, platform, dependencies):
    info = json.loads(t.extractfile('info/index.json').read().decode('utf-8'))
    try:
        paths = json.loads(
            t.extractfile('info/paths.json').read().decode('utf-8'))
        _check_paths_version(paths)
    except KeyError:
        paths = None
    source_plat = info['platform']
    source_type = 'unix' if source_plat in {'osx', 'linux'} else 'win'
    dest_plat, dest_arch = platform.split('-')
    dest_type = 'unix' if dest_plat in {'osx', 'linux'} else 'win'

    files = t.extractfile('info/files').read().decode("utf-8").splitlines()

    if source_type == 'unix' and dest_type == 'win':
        mapping = path_mapping_unix_windows
    elif source_type == 'win' and dest_type == 'unix':
        mapping = path_mapping_windows_unix
    else:
        mapping = path_mapping_identity

    newinfo = info.copy()
    newinfo['platform'] = dest_plat
    newinfo['arch'] = 'x86_64' if dest_arch == '64' else 'x86'
    newinfo['subdir'] = platform

    if dependencies is not None:
        newinfo['depends'] = dependencies

    pythons = list(filter(None, [pyver_re.match(p) for p in info['depends']]))
    pythons = list(set(p.group(1) for p in pythons))

    if 'python' in info['depends'] and not pythons:
        # No python version dependency was specified
        # Only a problem when converting from windows to unix, since
        # the python version is part of the folder structure on unix.
        if source_type == 'win' and dest_type == 'unix':
            raise RuntimeError(
                "Python dependency must explicit when converting"
                "from windows package to a linux packages")

    if len(pythons) > 1:
        raise RuntimeError(
            "Found more than one versioned Python dependency in package %s" %
            t.name)
    elif len(pythons) == 0:
        # not a Python package
        mapping = []
    else:
        pyver = pythons[0]
        mapping = [(re.compile(i[0].format(pyver=pyver)),
                    i[1].format(pyver=pyver)) for i in mapping]

    members = t.getmembers()
    file_map = {}
    paths_mapping_dict = {}  # keep track of what we change in files
    pathmember = None

    # is None when info/has_prefix does not exist
    has_prefix_files = None
    if 'info/has_prefix' in t.getnames():
        has_prefix_files = t.extractfile("info/has_prefix").read().decode()
    if has_prefix_files:
        fieldnames = ['prefix', 'type', 'path']
        try:
            csv_dialect = csv.Sniffer().sniff(has_prefix_files)
        except csv.Error:
            # should be a tab-delimited file.  Not completely sure whether text editors may have
            #    replaced tabs with spaces, though
            csv_dialect = csv.excel_tab
        csv_dialect.lineterminator = '\n'
        for attr in ('delimiter', 'quotechar'):
            if PY3 and hasattr(getattr(csv_dialect, attr), 'decode'):
                setattr(csv_dialect, attr, getattr(csv_dialect, attr).decode())
            elif not PY3 and hasattr(getattr(csv_dialect, attr), 'encode'):
                setattr(csv_dialect, attr, getattr(csv_dialect, attr).encode())
        has_prefix_files = csv.DictReader(has_prefix_files.splitlines(),
                                          fieldnames=fieldnames,
                                          dialect=csv_dialect)
        # convenience: store list of dictionaries as map by path
        has_prefix_files = {d['path']: d for d in has_prefix_files}

    for member in members:
        # Update metadata
        if member.path == 'info/index.json':
            newmember = tarfile.TarInfo('info/index.json')
            if PY3:
                newbytes = bytes(json.dumps(newinfo), 'utf-8')
            else:
                newbytes = json.dumps(newinfo)
            newmember.size = len(newbytes)
            file_map['info/index.json'] = (newmember, bytes_io(newbytes))
            continue
        elif member.path == 'info/files':
            # We have to do this at the end when we have all the files
            filemember = deepcopy(member)
            continue
        elif member.path == 'info/paths.json':
            pathmember = deepcopy(member)
            continue

        # Move paths
        oldpath = member.path
        append_new_path_to_has_prefix = False
        if has_prefix_files and oldpath in has_prefix_files:
            append_new_path_to_has_prefix = True

        for old, new in mapping:
            newpath = old.sub(new, oldpath)
            if newpath != oldpath:
                newmember = deepcopy(member)
                newmember.path = newpath
                assert member.path == oldpath
                file_map[oldpath] = None
                file_map[newpath] = newmember
                loc = files.index(oldpath)
                files[loc] = newpath
                paths_mapping_dict[oldpath] = newpath
                if append_new_path_to_has_prefix:
                    has_prefix_files[oldpath]['path'] = newpath
                break
        else:
            file_map[oldpath] = member

        # Make Windows compatible entry-points
        if source_type == 'unix' and dest_type == 'win':
            old = path_mapping_bat_proxy[0]
            for new in path_mapping_bat_proxy[1]:
                match = old.match(oldpath)
                if match:
                    newpath = old.sub(new, oldpath)
                    if newpath.endswith('-script'):
                        if match.group(2):
                            newpath = newpath + match.group(2)
                        else:
                            newpath = newpath + '.py'
                    if newpath != oldpath:
                        newmember = tarfile.TarInfo(newpath)
                        if newpath.endswith('.bat'):
                            if PY3:
                                data = bytes(BAT_PROXY.replace('\n', '\r\n'),
                                             'ascii')
                            else:
                                data = BAT_PROXY.replace('\n', '\r\n')
                        else:
                            data = t.extractfile(member).read()
                            if append_new_path_to_has_prefix:
                                has_prefix_files[oldpath]['path'] = newpath
                        newmember.size = len(data)
                        file_map[newpath] = newmember, bytes_io(data)
                        files.append(newpath)
                        found_path = [
                            p for p in paths['paths'] if p['_path'] == oldpath
                        ]
                        assert len(found_path) == 1
                        newdict = copy(found_path[0])
                        newdict['_path'] = newpath
                        paths['paths'].append(newdict)

    # Change paths.json the same way that we changed files
    if paths:
        updated_paths = _update_paths(paths, paths_mapping_dict)
        paths = json.dumps(updated_paths,
                           sort_keys=True,
                           indent=4,
                           separators=(',', ': '))
    files = list(set(files))
    files = '\n'.join(sorted(files)) + '\n'
    if PY3:
        files = bytes(files, 'utf-8')
        if paths:
            paths = bytes(paths, 'utf-8')
    filemember.size = len(files)
    file_map['info/files'] = filemember, bytes_io(files)
    if pathmember:
        pathmember.size = len(paths)
        file_map['info/paths.json'] = pathmember, bytes_io(paths)
    if has_prefix_files:
        output = StringIO()
        writer = csv.DictWriter(output,
                                fieldnames=fieldnames,
                                dialect=csv_dialect)
        writer.writerows(has_prefix_files.values())
        member = t.getmember('info/has_prefix')
        output_val = output.getvalue()
        if hasattr(output_val, 'encode'):
            output_val = output_val.encode()
        member.size = len(output_val)
        file_map['info/has_prefix'] = member, bytes_io(output_val)

    return file_map
Exemple #5
0
def get_pure_py_file_map(t, platform):
    info = json.loads(t.extractfile('info/index.json').read().decode('utf-8'))
    source_plat = info['platform']
    source_type = 'unix' if source_plat in {'osx', 'linux'} else 'win'
    dest_plat, dest_arch = platform.split('-')
    dest_type = 'unix' if dest_plat in {'osx', 'linux'} else 'win'

    files = t.extractfile('info/files').read().decode("utf-8")

    if source_type == 'unix' and dest_type == 'win':
        mapping = path_mapping_unix_windows
    elif source_type == 'win' and dest_type == 'unix':
        mapping = path_mapping_windows_unix
    else:
        mapping = path_mapping_identity

    newinfo = info.copy()
    newinfo['platform'] = dest_plat
    newinfo['arch'] = 'x86_64' if dest_arch == '64' else 'x86'
    newinfo['subdir'] = platform

    pythons = list(filter(None, [pyver_re.match(p) for p in info['depends']]))
    if len(pythons) > 1:
        raise RuntimeError("Found more than one Python dependency in package %s"
            % t.name)
    elif len(pythons) == 0:
        # not a Python package
        mapping = []
    elif pythons[0].group(1):
        pyver = pythons[0].group(1)

        mapping = [(re.compile(i[0].format(pyver=pyver)),
            i[1].format(pyver=pyver)) for i in mapping]
    else:
        # No python version dependency was specified
        # Only a problem when converting from windows to unix, since
        # the python version is part of the folder structure on unix.
        if source_type == 'win' and dest_type == 'unix':
            raise RuntimeError("Python dependency must explicit when converting"
                               "from windows package to a linux packages")

    members = t.getmembers()
    file_map = {}
    for member in members:
        # Update metadata
        if member.path == 'info/index.json':
            newmember = tarfile.TarInfo('info/index.json')
            if PY3:
                newbytes = bytes(json.dumps(newinfo), 'utf-8')
            else:
                newbytes = json.dumps(newinfo)
            newmember.size = len(newbytes)
            file_map['info/index.json'] = (newmember, bytes_io(newbytes))
            continue
        elif member.path == 'info/files':
            # We have to do this at the end when we have all the files
            filemember = deepcopy(member)
            continue
        elif member.path == 'info/has_prefix':
            if source_type == 'unix' and dest_type == 'win':
                # has_prefix is not needed on Windows
                file_map['info/has_prefix'] = None

        # Move paths
        oldpath = member.path
        for old, new in mapping:
            newpath = old.sub(new, oldpath)
            if oldpath in file_map:
                # Already been handled
                break
            if newpath != oldpath:
                newmember = deepcopy(member)
                newmember.path = newpath
                assert member.path == oldpath
                file_map[oldpath] = None
                file_map[newpath] = newmember
                files = files.replace(oldpath, newpath)
                break
        else:
            file_map[oldpath] = member

        # Make Windows compatible entry-points
        batseen = set()
        if source_type == 'unix' and dest_type == 'win':
            for old, new in path_mapping_bat_proxy:
                newpath = old.sub(new, oldpath)
                if oldpath in batseen:
                    break
                if newpath != oldpath:
                    newmember = tarfile.TarInfo(newpath)
                    if PY3:
                        data = bytes(BAT_PROXY.replace('\n', '\r\n'), 'ascii')
                    else:
                        data = BAT_PROXY.replace('\n', '\r\n')
                    newmember.size = len(data)
                    file_map[newpath] = newmember, bytes_io(data)
                    batseen.add(oldpath)
                    files = files + newpath + "\n"

    files = '\n'.join(sorted(files.splitlines())) + '\n'
    if PY3:
        files = bytes(files, 'utf-8')
    filemember.size = len(files)
    file_map['info/files'] = filemember, bytes_io(files)

    return file_map
Exemple #6
0
        elif url_or_path[:100].strip().startswith('<'):
            if isinstance(url_or_path, unicode):
                if encoding is None:
                    encoding = 'utf-8'
                url_or_path = url_or_path.encode(encoding)
            parser.Parse(url_or_path, True)
        else:
            f = open(url_or_path, 'rb')
            try:
                parser.ParseFile(f)
            except Exception, e:
                raise e 
            finally:
                f.close()
    elif PY3 and isinstance(url_or_path, bytes):
        parser.ParseFile(bytes_io(url_or_path))
    else:
        parser.ParseFile(url_or_path)
    return handler.root

class DrillElementIterator (object):
    READ_CHUNK_SIZE = 16384

    def __init__(self, filelike, parser):
        self.filelike = filelike
        self.parser = parser
        self.elements = []

    def add(self, element):
        self.elements.append(element)
def get_pure_py_file_map(t, platform):
    info = json.loads(t.extractfile('info/index.json').read().decode('utf-8'))
    source_plat = info['platform']
    source_type = 'unix' if source_plat in {'osx', 'linux'} else 'win'
    dest_plat, dest_arch = platform.split('-')
    dest_type = 'unix' if dest_plat in {'osx', 'linux'} else 'win'

    files = t.extractfile('info/files').read().decode("utf-8")

    if source_type == 'unix' and dest_type == 'win':
        mapping = path_mapping_unix_windows
    elif source_type == 'win' and dest_type == 'unix':
        mapping = path_mapping_windows_unix
    else:
        mapping = path_mapping_identity

    newinfo = info.copy()
    newinfo['platform'] = dest_plat
    newinfo['arch'] = 'x86_64' if dest_arch == '64' else 'x86'
    newinfo['subdir'] = platform

    pythons = list(filter(None, [pyver_re.match(p) for p in info['depends']]))
    if len(pythons) > 1:
        raise RuntimeError(
            "Found more than one Python dependency in package %s" % t.name)
    elif len(pythons) == 0:
        # not a Python package
        mapping = []
    elif pythons[0].group(1):
        pyver = pythons[0].group(1)

        mapping = [(re.compile(i[0].format(pyver=pyver)),
                    i[1].format(pyver=pyver)) for i in mapping]
    else:
        # No python version dependency was specified
        # Only a problem when converting from windows to unix, since
        # the python version is part of the folder structure on unix.
        if source_type == 'win' and dest_type == 'unix':
            raise RuntimeError(
                "Python dependency must explicit when converting"
                "from windows package to a linux packages")

    members = t.getmembers()
    file_map = {}
    for member in members:
        # Update metadata
        if member.path == 'info/index.json':
            newmember = tarfile.TarInfo('info/index.json')
            if PY3:
                newbytes = bytes(json.dumps(newinfo), 'utf-8')
            else:
                newbytes = json.dumps(newinfo)
            newmember.size = len(newbytes)
            file_map['info/index.json'] = (newmember, bytes_io(newbytes))
            continue
        elif member.path == 'info/files':
            # We have to do this at the end when we have all the files
            filemember = deepcopy(member)
            continue
        elif member.path == 'info/has_prefix':
            if source_type == 'unix' and dest_type == 'win':
                # has_prefix is not needed on Windows
                file_map['info/has_prefix'] = None

        # Move paths
        oldpath = member.path
        for old, new in mapping:
            newpath = old.sub(new, oldpath)
            if oldpath in file_map:
                # Already been handled
                break
            if newpath != oldpath:
                newmember = deepcopy(member)
                newmember.path = newpath
                assert member.path == oldpath
                file_map[oldpath] = None
                file_map[newpath] = newmember
                files = files.replace(oldpath, newpath)
                break
        else:
            file_map[oldpath] = member

        # Make Windows compatible entry-points
        batseen = set()
        if source_type == 'unix' and dest_type == 'win':
            for old, new in path_mapping_bat_proxy:
                newpath = old.sub(new, oldpath)
                if oldpath in batseen:
                    break
                if newpath != oldpath:
                    newmember = tarfile.TarInfo(newpath)
                    if PY3:
                        data = bytes(BAT_PROXY.replace('\n', '\r\n'), 'ascii')
                    else:
                        data = BAT_PROXY.replace('\n', '\r\n')
                    newmember.size = len(data)
                    file_map[newpath] = newmember, bytes_io(data)
                    batseen.add(oldpath)
                    files = files + newpath + "\n"

    files = '\n'.join(sorted(files.splitlines())) + '\n'
    if PY3:
        files = bytes(files, 'utf-8')
    filemember.size = len(files)
    file_map['info/files'] = filemember, bytes_io(files)

    return file_map
Exemple #8
0
def get_pure_py_file_map(t, platform):
    info = json.loads(t.extractfile('info/index.json').read().decode('utf-8'))
    try:
        paths = json.loads(t.extractfile('info/paths.json').read().decode('utf-8'))
        _check_paths_version(paths)
    except KeyError:
        paths = None
    source_plat = info['platform']
    source_type = 'unix' if source_plat in {'osx', 'linux'} else 'win'
    dest_plat, dest_arch = platform.split('-')
    dest_type = 'unix' if dest_plat in {'osx', 'linux'} else 'win'

    files = t.extractfile('info/files').read().decode("utf-8").splitlines()

    if source_type == 'unix' and dest_type == 'win':
        mapping = path_mapping_unix_windows
    elif source_type == 'win' and dest_type == 'unix':
        mapping = path_mapping_windows_unix
    else:
        mapping = path_mapping_identity

    newinfo = info.copy()
    newinfo['platform'] = dest_plat
    newinfo['arch'] = 'x86_64' if dest_arch == '64' else 'x86'
    newinfo['subdir'] = platform

    pythons = list(filter(None, [pyver_re.match(p) for p in info['depends']]))
    pythons = list(set(p.group(1) for p in pythons))

    if 'python' in info['depends'] and not pythons:
        # No python version dependency was specified
        # Only a problem when converting from windows to unix, since
        # the python version is part of the folder structure on unix.
        if source_type == 'win' and dest_type == 'unix':
            raise RuntimeError("Python dependency must explicit when converting"
                               "from windows package to a linux packages")

    if len(pythons) > 1:
        raise RuntimeError("Found more than one versioned Python dependency in package %s"
            % t.name)
    elif len(pythons) == 0:
        # not a Python package
        mapping = []
    else:
        pyver = pythons[0]
        mapping = [(re.compile(i[0].format(pyver=pyver)),
                    i[1].format(pyver=pyver)) for i in mapping]

    members = t.getmembers()
    file_map = {}
    paths_mapping_dict = {}  # keep track of what we change in files
    pathmember = None

    # is None when info/has_prefix does not exist
    has_prefix_files = None
    if 'info/has_prefix' in t.getnames():
        has_prefix_files = t.extractfile("info/has_prefix").read().decode()
    if has_prefix_files:
        fieldnames = ['prefix', 'type', 'path']
        try:
            csv_dialect = csv.Sniffer().sniff(has_prefix_files)
        except csv.Error:
            # should be a tab-delimited file.  Not completely sure whether text editors may have
            #    replaced tabs with spaces, though
            csv_dialect = csv.excel_tab
        csv_dialect.lineterminator = '\n'
        for attr in ('delimiter', 'quotechar'):
            if PY3 and hasattr(getattr(csv_dialect, attr), 'decode'):
                setattr(csv_dialect, attr, getattr(csv_dialect, attr).decode())
            elif not PY3 and hasattr(getattr(csv_dialect, attr), 'encode'):
                setattr(csv_dialect, attr, getattr(csv_dialect, attr).encode())
        has_prefix_files = csv.DictReader(has_prefix_files.splitlines(), fieldnames=fieldnames,
                                          dialect=csv_dialect)
        # convenience: store list of dictionaries as map by path
        has_prefix_files = {d['path']: d for d in has_prefix_files}

    for member in members:
        # Update metadata
        if member.path == 'info/index.json':
            newmember = tarfile.TarInfo('info/index.json')
            if PY3:
                newbytes = bytes(json.dumps(newinfo), 'utf-8')
            else:
                newbytes = json.dumps(newinfo)
            newmember.size = len(newbytes)
            file_map['info/index.json'] = (newmember, bytes_io(newbytes))
            continue
        elif member.path == 'info/files':
            # We have to do this at the end when we have all the files
            filemember = deepcopy(member)
            continue
        elif member.path == 'info/paths.json':
            pathmember = deepcopy(member)
            continue

        # Move paths
        oldpath = member.path
        append_new_path_to_has_prefix = False
        if has_prefix_files and oldpath in has_prefix_files:
            append_new_path_to_has_prefix = True

        for old, new in mapping:
            newpath = old.sub(new, oldpath)
            if newpath != oldpath:
                newmember = deepcopy(member)
                newmember.path = newpath
                assert member.path == oldpath
                file_map[oldpath] = None
                file_map[newpath] = newmember
                loc = files.index(oldpath)
                files[loc] = newpath
                paths_mapping_dict[oldpath] = newpath
                if append_new_path_to_has_prefix:
                    has_prefix_files[oldpath]['path'] = newpath
                break
        else:
            file_map[oldpath] = member

        # Make Windows compatible entry-points
        if source_type == 'unix' and dest_type == 'win':
            old = path_mapping_bat_proxy[0]
            for new in path_mapping_bat_proxy[1]:
                match = old.match(oldpath)
                if match:
                    newpath = old.sub(new, oldpath)
                    if newpath.endswith('-script'):
                        if match.group(2):
                            newpath = newpath + match.group(2)
                        else:
                            newpath = newpath + '.py'
                    if newpath != oldpath:
                        newmember = tarfile.TarInfo(newpath)
                        if newpath.endswith('.bat'):
                            if PY3:
                                data = bytes(BAT_PROXY.replace('\n', '\r\n'), 'ascii')
                            else:
                                data = BAT_PROXY.replace('\n', '\r\n')
                        else:
                            data = t.extractfile(member).read()
                            if append_new_path_to_has_prefix:
                                has_prefix_files[oldpath]['path'] = newpath
                        newmember.size = len(data)
                        file_map[newpath] = newmember, bytes_io(data)
                        files.append(newpath)
                        found_path = [p for p in paths['paths'] if p['_path'] == oldpath]
                        assert len(found_path) == 1
                        newdict = copy(found_path[0])
                        newdict['_path'] = newpath
                        paths['paths'].append(newdict)

    # Change paths.json the same way that we changed files
    if paths:
        updated_paths = _update_paths(paths, paths_mapping_dict)
        paths = json.dumps(updated_paths, sort_keys=True,
                           indent=4, separators=(',', ': '))
    files = list(set(files))
    files = '\n'.join(sorted(files)) + '\n'
    if PY3:
        files = bytes(files, 'utf-8')
        if paths:
            paths = bytes(paths, 'utf-8')
    filemember.size = len(files)
    file_map['info/files'] = filemember, bytes_io(files)
    if pathmember:
        pathmember.size = len(paths)
        file_map['info/paths.json'] = pathmember, bytes_io(paths)
    if has_prefix_files:
        output = StringIO()
        writer = csv.DictWriter(output, fieldnames=fieldnames, dialect=csv_dialect)
        writer.writerows(has_prefix_files.values())
        member = t.getmember('info/has_prefix')
        output_val = output.getvalue()
        if hasattr(output_val, 'encode'):
            output_val = output_val.encode()
        member.size = len(output_val)
        file_map['info/has_prefix'] = member, bytes_io(output_val)

    return file_map