Ejemplos de detect en Python, ejemplos de fastchardet.detect en Python

Ejemplo n.º 1

0

Mostrar archivo

Archivo: convert.py Proyecto: alkimake/batch-encoding-convert

    def convert_to_utf8(self, filename):
        # try to open the file and exit if some IOError occurs
        """

        :param filename:
        """
        try:
            f = open(filename, 'r').read()
            print fastchardet.detect(f)
        except Exception:
            sys.exit(1)


        try:
            encoding_ = fastchardet.detect(f)['encoding']
            if encoding_ and encoding_=="ANSI":
                data = f.decode(encoding_)
                # now get the absolute path of our filename and append .bak
                # to the end of it (for our backup file)
                fpath = os.path.abspath(filename)
                newfilename = fpath + '.bak'
                # and make our backup file with shutil
                shutil.copy(filename, newfilename)

                # and at last convert it to utf-8
                f = open(filename, 'w')
                try:
                    f.write(data.encode('utf-8'))
                except Exception, e:
                    print e
                finally:
                    f.close()

Ejemplo n.º 2

0

Mostrar archivo

Archivo: convert.py Proyecto: alkimake/batch-encoding-convert

    def convert_to_utf8(self, filename):
        # try to open the file and exit if some IOError occurs
        """

        :param filename:
        """
        try:
            f = open(filename, 'r').read()
            print fastchardet.detect(f)
        except Exception:
            sys.exit(1)

        try:
            encoding_ = fastchardet.detect(f)['encoding']
            if encoding_ and encoding_ == "ANSI":
                data = f.decode(encoding_)
                # now get the absolute path of our filename and append .bak
                # to the end of it (for our backup file)
                fpath = os.path.abspath(filename)
                newfilename = fpath + '.bak'
                # and make our backup file with shutil
                shutil.copy(filename, newfilename)

                # and at last convert it to utf-8
                f = open(filename, 'w')
                try:
                    f.write(data.encode('utf-8'))
                except Exception, e:
                    print e
                finally:
                    f.close()

Ejemplo n.º 3

0

Mostrar archivo

Archivo: l10ncompleteness.py Proyecto: AutomatedTester/amo-validator

def _parse_l10n_doc(name, doc, no_encoding=False):
    'Parses an L10n document.'

    extension = name.split('.')[-1].lower()

    handlers = {'dtd': dtd.DTDParser,
                'properties': properties.PropertiesParser}
    # These are expected encodings for the various files.
    handler_formats = ('ASCII', 'UTF_8')
    if extension not in handlers:
        return None

    wrapper = StringIO(doc)
    loc_doc = handlers[extension](wrapper)

    # Allow the parse to specify files to skip for encoding checks
    if not no_encoding:
        try:
            # This is much faster than fastchardet, and succeeds more often
            # than fails.
            doc.decode('utf-8')
            encoding = 'UTF_8'
        except UnicodeDecodeError:
            encoding = fastchardet.detect(doc)['encoding'].upper()
        loc_doc.expected_encoding = encoding in handler_formats
        loc_doc.suitable_encoding = handler_formats

    return loc_doc

Ejemplo n.º 4

0

Mostrar archivo

Archivo: test_fastchardet.py Proyecto: pombredanne/amo-validator

def test_unicode():
    """
    Make sure that things turn out right when we're silly sallies and pass
    unicode in.
    """

    assert fastchardet.detect(unicode('foo'))['encoding'] == 'unicode'

Ejemplo n.º 5

0

Mostrar archivo

Archivo: l10ncompleteness.py Proyecto: pombredanne/amo-validator

def _parse_l10n_doc(name, doc, no_encoding=False):
    'Parses an L10n document.'

    extension = name.split('.')[-1].lower()

    handlers = {
        'dtd': dtd.DTDParser,
        'properties': properties.PropertiesParser
    }
    # These are expected encodings for the various files.
    handler_formats = ('ASCII', 'UTF_8')
    if extension not in handlers:
        return None

    wrapper = StringIO(doc)
    loc_doc = handlers[extension](wrapper)

    # Allow the parse to specify files to skip for encoding checks
    if not no_encoding:
        try:
            # This is much faster than fastchardet, and succeeds more often
            # than fails.
            doc.decode('utf-8')
            encoding = 'UTF_8'
        except UnicodeDecodeError:
            encoding = fastchardet.detect(doc)['encoding'].upper()
        loc_doc.expected_encoding = encoding in handler_formats
        loc_doc.suitable_encoding = handler_formats

    return loc_doc

Ejemplo n.º 6

0

Mostrar archivo

Archivo: test_fastchardet.py Proyecto: pombreda/amo-validator

def test_unicode():
    """
    Make sure that things turn out right when we're silly sallies and pass
    unicode in.
    """

    assert fastchardet.detect(unicode("foo"))["encoding"] == "unicode"

Ejemplo n.º 7

0

Mostrar archivo

Archivo: test_fastchardet.py Proyecto: pombredanne/amo-validator

def test_esoteric():
    """Make sure that fastchardet can detect other encodings."""

    a = lambda code: fastchardet.detect(code)['encoding']

    # High Bytes
    print a('High Byte:\x91')
    assert a('High Byte:\x91') == 'windows-1252'

    # UTF-8 without BOM
    print a('\xc2\xbc + \xc2\xbd = \xcd\xbe')
    assert a('\xc2\xbc + \xc2\xbd = \xcd\xbe') == 'utf_8'

Ejemplo n.º 8

0

Mostrar archivo

Archivo: test_fastchardet.py Proyecto: pombreda/amo-validator

def test_esoteric():
    """Make sure that fastchardet can detect other encodings."""

    a = lambda code: fastchardet.detect(code)["encoding"]

    # High Bytes
    print a("High Byte:\x91")
    assert a("High Byte:\x91") == "windows-1252"

    # UTF-8 without BOM
    print a("\xc2\xbc + \xc2\xbd = \xcd\xbe")
    assert a("\xc2\xbc + \xc2\xbd = \xcd\xbe") == "utf_8"

Ejemplo n.º 9

0

Mostrar archivo

Archivo: l10ncompleteness.py Proyecto: robhudson/amo-validator

def _parse_l10n_doc(name, doc, no_encoding=False):
    "Parses an L10n document."

    extension = name.split(".")[-1].lower()

    handlers = {"dtd": dtd.DTDParser, "properties": properties.PropertiesParser}
    # These are expected encodings for the various files.
    handler_formats = ("ASCII", "UTF_8")
    if extension not in handlers:
        return None

    wrapper = StringIO(doc)
    loc_doc = handlers[extension](wrapper)

    # Allow the parse to specify files to skip for encoding checks
    if not no_encoding:
        encoding = fastchardet.detect(doc)["encoding"].upper()
        loc_doc.expected_encoding = encoding in handler_formats
        loc_doc.suitable_encoding = handler_formats

    return loc_doc

Ejemplo n.º 10

0

Mostrar archivo

def _parse_l10n_doc(name, doc, no_encoding=False):
    "Parses an L10n document."

    extension = name.split(".")[-1].lower()

    handlers = {
        "dtd": dtd.DTDParser,
        "properties": properties.PropertiesParser
    }
    # These are expected encodings for the various files.
    handler_formats = ("ASCII", "UTF_8")
    if extension not in handlers:
        return None

    wrapper = StringIO(doc)
    loc_doc = handlers[extension](wrapper)

    # Allow the parse to specify files to skip for encoding checks
    if not no_encoding:
        encoding = fastchardet.detect(doc)["encoding"].upper()
        loc_doc.expected_encoding = encoding in handler_formats
        loc_doc.suitable_encoding = handler_formats

    return loc_doc

Ejemplo n.º 11

0

Mostrar archivo

Archivo: test_fastchardet.py Proyecto: pombredanne/amo-validator

def test_utf8():
    """Determine that fastchardet properly detects UTF-8."""

    assert fastchardet.detect("""\xEF\xBB\xBF
            Haldo, UTF-8
            """)['encoding'] == 'utf_8'

Ejemplo n.º 12

0

Mostrar archivo

Archivo: test_fastchardet.py Proyecto: pombredanne/amo-validator

def test_ascii():
    """Determines that fastchardet detects ASCII properly."""
    assert fastchardet.detect('This is plain ASCII')['encoding'] == 'ascii'

Ejemplo n.º 13

0

Mostrar archivo

Archivo: test_fastchardet.py Proyecto: pombredanne/amo-validator

def test_utfn():
    """Determine that fastchardet properly detects UTF-N."""

    assert fastchardet.detect("""\xFF\xFE\x00\x00
            Haldo, UTF-Not 8
            """)['encoding'] == 'utf_n'

Ejemplo n.º 14

0

Mostrar archivo

Archivo: test_fastchardet.py Proyecto: Archaeopteryx/amo-validator

def test_utf8():
    """Determine that fastchardet properly detects UTF-8."""

    assert fastchardet.detect("""\xEF\xBB\xBF
            Haldo, UTF-8
            """)["encoding"] == "utf_8"

Ejemplo n.º 15

0

Mostrar archivo

Archivo: test_fastchardet.py Proyecto: pombreda/amo-validator

def test_ascii():
    """Determines that fastchardet detects ASCII properly."""
    assert fastchardet.detect("This is plain ASCII")["encoding"] == "ascii"

Ejemplo n.º 16

0

Mostrar archivo

Archivo: test_fastchardet.py Proyecto: Archaeopteryx/amo-validator

def test_utfn():
    """Determine that fastchardet properly detects UTF-N."""

    assert fastchardet.detect("""\xFF\xFE\x00\x00
            Haldo, UTF-Not 8
            """)["encoding"] == "utf_n"