コード例 #1
0
ファイル: scanner.py プロジェクト: streeter/flanker
    def setup(token):
        if token.is_content_type():
            return True

        elif token.is_boundary():
            value = token.value[2:]

            if value in boundaries:
                token.value = value
                token.final = False
                return True

            if strip_endings(value) in boundaries:
                token.value = strip_endings(value)
                token.final = True
                return True

            # false boundary
            return False

        else:
            raise DecodingError("Unknown token")

        return token.is_content_type() or \
            (token.is_boundary() and token in boundaries)
コード例 #2
0
ファイル: scanner.py プロジェクト: streeter/flanker
def scan(string):
    """Scanner that uses 1 pass to scan the entire message and
    build a message tree"""

    if not isinstance(string, str):
        raise DecodingError("Scanner works with byte strings only")

    tokens = tokenize(string)
    if not tokens:
        tokens = [default_content_type()]
    try:
        return traverse(Start(), TokensIterator(tokens, string))
    except DecodingError:
        raise
    except Exception:
        raise DecodingError("Mailformed MIME message")
コード例 #3
0
def parse_header(header):
    """ Accepts a raw header with name, colons and newlines
    and returns it's parsed value
    """
    name, val = split2(header)
    if not is_pure_ascii(name):
        raise DecodingError("Non-ascii header name")
    return name, parse_header_value(name, encodedword.unfold(val))
コード例 #4
0
 def check(self):
     """ This function is used to protect our lovely scanner
     from the deadloops, we count the number of operations performed
     and will raise an exception if things go wrong (too much ops)
     """
     self.opcount += 1
     if self.opcount > _MAX_OPS:
         raise DecodingError("Too many parts: {0}, max is {1}".format(
             self.opcount, _MAX_OPS))
コード例 #5
0
def decode_body(content_type, content_encoding, body):
    # decode the transfer encoding
    try:
        body = decode_transfer_encoding(content_encoding, body)
    except Exception:
        raise DecodingError("Failed to decode body")

    # decode the charset next
    return decode_charset(content_type, body)
コード例 #6
0
def scan(string):
    """Scanner that uses 1 pass to scan the entire message and
    build a message tree"""

    if six.PY2:
        if not isinstance(string, six.binary_type):
            raise DecodingError('Scanner works with binary only')
    else:
        if isinstance(string, six.binary_type):
            string = string.decode('utf-8')

    tokens = tokenize(string)
    if not tokens:
        tokens = [default_content_type()]
    try:
        return traverse(Start(), TokensIterator(tokens, string))
    except DecodingError:
        raise
    except Exception as cause:
        raise six.raise_from(DecodingError("Malformed MIME message"), cause)
コード例 #7
0
ファイル: parsing.py プロジェクト: ubi-mirrors/flanker
def parse_header_value(name, val):
    if not is_pure_ascii(val):
        val = to_unicode(val)
    if parametrized.is_parametrized(name, val):
        val, params = parametrized.decode(val)
        if val is not None and not is_pure_ascii(val):
            raise DecodingError('Non-ascii content header value')
        if name == 'Content-Type':
            main, sub = parametrized.fix_content_type(val)
            return ContentType(main, sub, params)

        return WithParams(val, params)

    return val
コード例 #8
0
def parse_header_value(name, val):
    if not is_pure_ascii(val):
        if parametrized.is_parametrized(name, val):
            raise DecodingError("Unsupported value in content- header")
        return to_unicode(val)
    else:
        if parametrized.is_parametrized(name, val):
            val, params = parametrized.decode(val)
            if name == 'Content-Type':
                main, sub = parametrized.fix_content_type(val)
                return ContentType(main, sub, params)
            else:
                return WithParams(val, params)
        else:
            return val
コード例 #9
0
def scan(string):
    """Scanner that uses 1 pass to scan the entire message and
    build a message tree"""

    if six.PY2:
        if not isinstance(string, six.binary_type) \
                                               and not isinstance(string, mmap):
            raise DecodingError('Scanner works with binary only')
    else:
        if isinstance(string, six.binary_type):
            string = string.decode('utf-8')

    tokens = tokenize(string)
    if not tokens:
        tokens = [default_content_type()]

    return traverse(Start(), TokensIterator(tokens, string))
コード例 #10
0
ファイル: parsing.py プロジェクト: ubi-mirrors/flanker
def _read_header_lines(fp):
    """Read lines with headers until the start of body"""
    lines = deque()
    for line in fp:
        if len(line) > _MAX_LINE_LENGTH:
            raise DecodingError('Line is too long: %d' % len(line))

        if is_empty(line):
            break

        # tricky case if it's not a header and not an empty line
        # ususally means that user forgot to separate the body and newlines
        # so "unread" this line here, what means to treat it like a body
        if not _RE_HEADER.match(line):
            fp.seek(fp.tell() - len(line))
            break

        lines.append(line)

    return lines
コード例 #11
0
def _filter_false_tokens(tokens):
    """
    Traverses a list of pre-scanned tokens and removes false content-type
    and boundary tokens.

    A content-type header is false unless it it the first content-type header
    in a message/part headers section.

    A boundary token is false if it has not been mentioned in a preceding
    content-type header.
    """
    current_section = _SECTION_HEADERS
    current_content_type = None
    filtered = []
    boundaries = []
    for token in tokens:
        if isinstance(token, ContentType):
            # Only the first content-type header in a headers section is valid.
            if current_content_type or current_section != _SECTION_HEADERS:
                continue

            current_content_type = token
            boundaries.append(token.get_boundary())

        elif isinstance(token, Boundary):
            value = token.value[2:]

            if value in boundaries:
                token.value = value
                token.final = False
                current_section = _SECTION_HEADERS
                current_content_type = None

            elif _strip_endings(value) in boundaries:
                token.value = _strip_endings(value)
                token.final = True
                current_section = _SECTION_MULTIPART_EPILOGUE

            else:
                # False boundary detected!
                continue

        elif token == _EMPTY_LINE:
            if current_section == _SECTION_HEADERS:
                if not current_content_type:
                    current_content_type = _DEFAULT_CONTENT_TYPE

                if current_content_type.is_singlepart():
                    current_section = _SECTION_BODY
                elif current_content_type.is_multipart():
                    current_section = _SECTION_MULTIPART_PREAMBLE
                else:
                    # Start of an enclosed message or just its headers.
                    current_section = _SECTION_HEADERS
                    current_content_type = None

            # Cast away empty line tokens, for they have been pre-scanned just
            # to identify a place where a header section completes and a body
            # section starts.
            continue

        else:
            raise DecodingError("Unknown token")

        filtered.append(token)

    return filtered
コード例 #12
0
def traverse(pointer, iterator, parent=None, allow_bad_mime=False):
    """Recursive-descendant parser"""

    iterator.check()
    token = iterator.next()

    # this means that this part does not have any
    # content type set, so set it to RFC default (text/plain)
    # it even can have no headers
    if token.is_end() or token.is_boundary():

        return make_part(content_type=default_content_type(),
                         start=pointer,
                         end=token,
                         iterator=iterator,
                         parent=parent)

    # this part tells us that it is singlepart
    # so we should ignore all other content-type headers
    # until the boundary or the end of message
    if token.is_singlepart():

        while True:
            iterator.check()
            end = iterator.next()
            if not end.is_content_type():
                break

        return make_part(content_type=token,
                         start=pointer,
                         end=end,
                         iterator=iterator,
                         parent=parent)

    # good old multipart message
    # here goes the real recursion
    # we scan part by part until the end
    elif token.is_multipart():
        content_type = token

        # well, multipart message should provide
        # some boundary, how could we parse it otherwise?
        boundary = content_type.get_boundary()
        if not boundary:
            raise DecodingError("Multipart message without boundary")

        parts = deque()
        token = iterator.next()

        # we are expecting first boundary for multipart message
        # something is broken otherwise
        if not token.is_boundary() or token != boundary:
            if allow_bad_mime and parent and parent.is_message_container():
                return None
            raise DecodingError("Multipart message without starting boundary")

        while True:
            token = iterator.current()
            if token.is_end():
                break
            if token == boundary and token.is_final():
                iterator.next()
                break
            parts.append(traverse(token, iterator, content_type))

        return make_part(content_type=content_type,
                         start=pointer,
                         end=token,
                         iterator=iterator,
                         parts=parts,
                         parent=parent)

    # this is a weird mime part, actually
    # it can contain multiple headers
    # separated by newlines, so we grab them here
    elif token.is_delivery_status():

        if parent and parent.is_multipart():
            while True:
                iterator.check()
                end = iterator.next()
                if not end.is_content_type():
                    break
        else:
            raise DecodingError("Malformed delivery status message")

        return make_part(content_type=token,
                         start=pointer,
                         end=end,
                         iterator=iterator,
                         parent=parent)

    # this is a message container that holds
    # a message inside, delimited from parent
    # headers by newline
    elif token.is_message_container():
        # Delivery notification body can contain all sorts of bad MIME.
        allow_bad_mime = parent and parent.is_delivery_report()

        enclosed = traverse(pointer, iterator, token, allow_bad_mime)
        return make_part(
            content_type=token if enclosed else default_content_type(),
            start=pointer,
            end=iterator.current(),
            iterator=iterator,
            enclosed=enclosed,
            parent=parent)

    # this part contains headers separated by newlines,
    # grab these headers and enclose them in one part
    elif token.is_headers_container():
        enclosed = grab_headers(pointer, iterator, token)
        return make_part(content_type=token,
                         start=pointer,
                         end=iterator.current(),
                         iterator=iterator,
                         enclosed=enclosed,
                         parent=parent)