Python filter_ascii Examples

Programming Language: Python

Namespace/Package Name: textfilter

Method/Function: filter_ascii

Examples at hotexamples.com: 4

Python filter_ascii - 4 examples found. These are the top rated real world Python examples of textfilter.filter_ascii extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: contextgenerator.py Project: nmaier/amo-validator

    def _format_line(self, line, column=0, rel_line=1):
        "Formats a line from the data to be the appropriate length"

        raw_data = self.data[line].rstrip()

        with_ws = len(raw_data)
        data = raw_data.lstrip()
        line_length = len(data)

        if line_length > 140:
            if rel_line == 0:
                # Trim from the beginning
                data = "... %s" % data[-140:]
            elif rel_line == 1:
                # Trim surrounding the error position
                
                if column < 70:
                    data = "%s ..." % data[:140]
                elif column > line_length - 70:
                    data = "... %s" % data[-140:]
                else:
                    data = "... %s ..." % data[column - 70:column + 70]

            elif rel_line == 2:
                # Trim from the end
                data = "%s ..." % data[:140]

        data = "%s%s" % (raw_data[0:with_ws - line_length], data)
        data = textfilter.filter_ascii(data)
        return data

Example #2

Show file

File: errorbundler.py Project: nmaier/amo-validator

    def _save_message(self, stack, type_, message, context=None):
        "Stores a message in the appropriate message stack."
        
        uid = uuid.uuid4().hex
        
        message["uid"] = uid

        # Get the context for the message (if there's a context available)
        if context is not None:
            if isinstance(context, tuple):
                message["context"] = context
            else:
                message["context"] = \
                            context.get_context(line=message["line"],
                                                column=message["column"])
        else:
            message["context"] = None
        
        message["message"] = filter_ascii(message["message"])
        message["description"] = filter_ascii(message["description"])
        
        stack.append(message)
        
        # Mark the tier that the error occurred at
        if message["tier"] is None:
            message["tier"] = self.tier

        if message["id"]:
            tree = self.message_tree
            last_id = None
            for eid in message["id"]:
                if last_id is not None:
                    tree = tree[last_id]
                if eid not in tree:
                    tree[eid] = {"__errors": 0,
                                 "__warnings": 0,
                                 "__notices": 0,
                                 "__messages": []}
                tree[eid]["__%s" % type_] += 1
                last_id = eid
        
            tree[last_id]['__messages'].append(uid)

Example #3

Show file

def decode(data):
    """
    Decode data employing some charset detection and including unicode BOM
    stripping.
    """

    # Don't make more work than we have to.
    if not isinstance(data, str):
        return data

    # Detect standard unicodes.
    for bom, encoding in UNICODES:
        if data.startswith(bom):
            return unicode(data[len(bom):], encoding, "ignore")

    # Try straight UTF-8
    try:
        return unicode(data, "utf-8")
    except UnicodeDecodeError:
        pass

    # Test for latin_1, because it can be matched as UTF-16
    # Somewhat of a hack, but it works and is about a thousand times faster
    # than using chardet.
    if all(ord(c) < 256 for c in data):
        try:
            return unicode(data, "latin_1")
        except UnicodeDecodeError:
            pass

    # Test for various common encodings.
    for encoding in COMMON_ENCODINGS:
        try:
            return unicode(data, encoding)
        except UnicodeDecodeError:
            pass

    # Anything else gets filtered.
    return unicode(textfilter.filter_ascii(data), errors="replace")

Example #4

Show file

File: unicodehelper.py Project: Archaeopteryx/amo-validator

def decode(data):
    """
    Decode data employing some charset detection and including unicode BOM
    stripping.
    """

    # Don't make more work than we have to.
    if not isinstance(data, str):
        return data

    # Detect standard unicodes.
    for bom, encoding in UNICODES:
        if data.startswith(bom):
            return unicode(data[len(bom):], encoding, "ignore")

    # Try straight UTF-8
    try:
        return unicode(data, "utf-8")
    except UnicodeDecodeError:
        pass

    # Test for latin_1, because it can be matched as UTF-16
    # Somewhat of a hack, but it works and is about a thousand times faster
    # than using chardet.
    if all(ord(c) < 256 for c in data):
        try:
            return unicode(data, "latin_1")
        except UnicodeDecodeError:
            pass

    # Test for various common encodings.
    for encoding in COMMON_ENCODINGS:
        try:
            return unicode(data, encoding)
        except UnicodeDecodeError:
            pass

    # Anything else gets filtered.
    return unicode(textfilter.filter_ascii(data), errors="replace")