Example #1
0
def as_entry(infos):
    """
    Return an Entry built from 7zip path data.
    """
    e = extractcode.Entry()
    e.path = infos.get('Path')
    e.size = infos.get('Size', 0)
    e.packed_size = infos.get('Packed Size', 0)
    e.date = infos.get('Modified', 0)
    e.is_dir = infos.get('Folder', False) == '+'
    e.is_file = not e.is_dir
    e.is_broken_link = False
    e.mode = infos.get('Mode', '')
    e.user = infos.get('User')
    e.group = infos.get('Group')
    e.is_special = False
    e.is_hardlink = False
    sl = infos.get('Symbolic Link')
    if sl:
        e.is_symlink = True
        e.link_target = sl
    hl = infos.get('Hard Link')
    if hl:
        e.is_hardlink = True
        e.link_target = hl
    if sl and hl:
        raise ExtractWarningIncorrectEntry('A Symlink cannot be a hardlink too')
    e.linkcount = infos.get('Links', 0)
    e.host = infos.get('Host OS')
    e.comment = infos.get('Comment')
    e.encrypted = infos.get('Encrypted')
    return e
Example #2
0
    def from_dict(cls, infos, errors=None):
        """
        Return an Entry built from a 7zip path listing data in the `infos` mapping.
        """
        is_symlink = False
        is_hardlink = False
        link_target = None

        sl = infos.get('Symbolic Link')

        if sl:
            is_symlink = True
            link_target = sl

        hl = infos.get('Hard Link')
        if hl:
            is_hardlink = True
            link_target = hl

        if sl and hl:
            from pprint import pformat
            raise ExtractWarningIncorrectEntry(
                'A symlink cannot be also a hardlink: {}'.format(
                    pformat(infos)))

        # depending on the type of arhcive the file vs dir flags are in
        # diiferent attributes :|
        is_dir = (
            # in some listings we have this: Mode = drwxrwxr-x
            infos.get('Mode', '').lower().startswith('d') or
            # in cpio and a few more we have a Folder attrib
            infos.get('Folder', '').startswith('+') or
            # in 7z listing we have this: Attributes = D_ drwxrwxr-x
            infos.get('Attributes', '').lower().startswith('d_')) or False

        is_file = not is_dir

        e = cls(
            path=infos.get('Path'),
            size=infos.get('Size', 0),
            date=infos.get('Modified', None),
            is_dir=is_dir,
            is_file=is_file,
            is_symlink=is_symlink,
            is_hardlink=is_hardlink,
            link_target=link_target,
            errors=errors or [],
        )
        return e
Example #3
0
def parse_7z_listing(location, utf=False):
    """
    Parse a long format 7zip listing and return an iterable of entry.

    The 7zip -slt format is:
    - copyright and version details
    - '--' line
        - archive header info, varying based on the archive types and subtype
              - lines of key=value pairs
              - Errors: followed by one or more message lines
              - Warnings: followed by one or more message lines
              - Open Warning: : followed by one or more message lines
        - sometimes a '---' line
    - blank line
    - '----------' line
    - for each archive member:
      - lines of either
          - key = value pairs
          - Errors: followed by one or more message lines
          - Warnings: followed by one or more message lines
          - Open Warning: : followed by one or more message lines
      - blank line
    - two blank lines
    - footer sometimes with lines with summary stats
        such as Warnings: 1 Errors: 1
    - a line with two or more dashes or an empty line
    """
    if utf:
        text = codecs.open(location, encoding='UTF-8').read()
        text = text.replace(u'\r\n', u'\n')
    else:
        text = open(location, 'rb').read()

    header_tail = re.split('\n----------\n', text, flags=re.MULTILINE)
    if len(header_tail) != 2:
        # we more than one a header, confusion entails.
        raise ExtractWarningIncorrectEntry('Incorrect 7zip listing with multiple headers')

    if len(header_tail) == 1:
        # we have only a header, likely an error condition or an empty archive
        return []

    _header, body = header_tail
    body_and_footer = re.split('\n\n\n', body, flags=re.MULTILINE)
    no_footer = len(body_and_footer) == 1
    multiple_footers = len(body_and_footer) > 2
    _footer = ''
    if no_footer:
        body = body_and_footer[0]
    elif multiple_footers:
        raise ExtractWarningIncorrectEntry('Incorrect 7zip listing with multiple footers')
    else:
        body, _footer == body_and_footer

    # FIXME: do something with header and footer?

    entries = []
    paths = re.split('\n\n', body, flags=re.MULTILINE)
    for path in paths:
        is_err = False
        errors = []
        infos = {}
        lines = path.splitlines(False)
        for line in lines:
            line = line.strip()
            if not line:
                continue
            if line.startswith(('Open Warning:', 'Errors:', 'Warnings:')):
                is_err = True
                messages = line.split(':', 1)
                errors.append(messages)
                continue
            if '=' not in line and is_err:
                # not a key = value line, an error message
                errors.append(line)
                continue
            parts = line.split('=', 1)
            if len(parts) != 2:
                raise ExtractWarningIncorrectEntry('Incorrect 7zip listing line with no key=value')
            is_err = False
            key, value = parts
            assert key not in infos, 'Duplicate keys in 7zip listing'
            infos[key.strip()] = value.strip() or ''
        if infos:
            entries.append(as_entry(infos))

    return entries
Example #4
0
def parse_7z_listing(location, utf=False):
    """
    Parse a long format 7zip listing and return an iterable of entry.

    If `utf` is True or if on Python 3, the console output will treated as
    utf-8-encoded text. Otherwise it is treated as bytes.

    The 7zip -slt format is:
    - copyright and version details
    - '--' line
        - archive header info, varying based on the archive types and subtype
              - lines of key=value pairs
              - Errors: followed by one or more message lines
              - Warnings: followed by one or more message lines
              - Open Warning: : followed by one or more message lines
        - sometimes a '---' line
    - blank line
    - '----------' line
    - for each archive member:
      - lines of either
          - key = value pairs
          - Errors: followed by one or more message lines
          - Warnings: followed by one or more message lines
          - Open Warning: : followed by one or more message lines
      - blank line
    - two blank lines
    - footer sometimes with lines with summary stats
        such as Warnings: 1 Errors: 1
    - a line with two or more dashes or an empty line
    """

    if utf or py3:
        # read to unicode
        with io.open(location, 'r', encoding='utf-8') as listing:
            text = listing.read()
            text = text.replace(u'\r\n', u'\n')

            header_sep = u'\n----------\n'
            empty = u''
            body_sep = u'\n\n\n'
            path_sep = u'\n\n'
            msg_sep = u':'
            equal_sep = u'='
            errror_line_starters = 'Open Warning:', 'Errors:', 'Warnings:'

    else:
        # read to bytes
        with io.open(location, 'rb') as listing:
            text = listing.read()

            header_sep = b'\n----------\n'
            empty = b''
            body_sep = b'\n\n\n'
            path_sep = b'\n\n'
            msg_sep = b':'
            equal_sep = b'='
            errror_line_starters = b'Open Warning:', b'Errors:', b'Warnings:'

    if TRACE:
        logger.debug('parse_7z_listing: initial text: type: ' +
                     repr(type(text)))
        print('--------------------------------------')
        print(text)
        print('--------------------------------------')

    header_tail = re.split(header_sep, text, flags=re.MULTILINE)
    if len(header_tail) != 2:
        # we more than one a header, confusion entails.
        raise ExtractWarningIncorrectEntry(
            'Incorrect 7zip listing with multiple headers')

    if len(header_tail) == 1:
        # we have only a header, likely an error condition or an empty archive
        return []

    # FIXME: do something with header and footer?
    _header, body = header_tail
    body_and_footer = re.split(body_sep, body, flags=re.MULTILINE)
    no_footer = len(body_and_footer) == 1
    multiple_footers = len(body_and_footer) > 2
    _footer = empty
    if no_footer:
        body = body_and_footer[0]
    elif multiple_footers:
        raise ExtractWarningIncorrectEntry(
            'Incorrect 7zip listing with multiple footers')
    else:
        body, _footer == body_and_footer

    entries = []

    if TRACE:
        logger.debug('parse_7z_listing: body:')
        print(body)

    paths = re.split(path_sep, body, flags=re.MULTILINE)

    if TRACE:
        logger.debug('parse_7z_listing: paths:')
        pprint(paths)

    for path in paths:
        is_err = False
        errors = []
        infos = {}
        lines = path.splitlines(False)
        for line in lines:
            line = line.strip()
            if not line:
                continue
            if line.startswith(errror_line_starters):
                is_err = True
                messages = line.split(msg_sep, 1)
                errors.append(messages)
                continue
            if equal_sep not in line and is_err:
                # not a key = value line, an error message
                errors.append(line)
                continue
            parts = line.split(equal_sep, 1)
            if len(parts) != 2:
                raise ExtractWarningIncorrectEntry(
                    'Incorrect 7zip listing line with no key=value')
            is_err = False
            key, value = parts
            assert key not in infos, 'Duplicate keys in 7zip listing'
            infos[key.strip()] = value.strip() or empty
        if infos:
            entries.append(as_entry(infos))

    return entries