def test_explain_false_handler_set_behavior(self, caplog):
     test_sequence = b'This is a test sequence of bytes that should be sufficient'
     set_logging_handler(level=TRACE, format_string="%(message)s")
     from_bytes(test_sequence, steps=1, chunk_size=50, explain=False)
     assert any(
         isinstance(hdl, logging.StreamHandler)
         for hdl in self.logger.handlers)
     for record in caplog.records:
         assert record.levelname in ["Level 5", "DEBUG"]
     assert "Encoding detection: ascii is most likely the one." in caplog.text
def test_alphabets_property():
    best_guess = from_bytes(
        "😀 Hello World! How affairs are going? 😀".encode("utf_8")).best()

    assert "Basic Latin" in best_guess.alphabets
    assert "Emoticons range(Emoji)" in best_guess.alphabets
    assert best_guess.alphabets.count("Basic Latin") == 1
def test_empty_but_with_bom_or_sig(payload, expected_encoding):
    best_guess = from_bytes(payload).best()

    assert best_guess is not None, "Empty detection but with SIG/BOM has failed!"
    assert best_guess.encoding == expected_encoding, "Empty detection but with SIG/BOM is wrongly detected!"
    assert best_guess.raw == payload, "The RAW property should contain the original payload given for detection."
    assert best_guess.byte_order_mark is True, "The BOM/SIG property should return True"
    assert str(best_guess) == "", "The cast to str SHOULD be empty"
def test_bool_matches():
    guesses_not_empty = from_bytes(b'')
    guesses_empty = CharsetMatches([])

    assert bool(
        guesses_not_empty
    ) is True, "Bool behaviour of CharsetMatches altered, should be True"
    assert bool(
        guesses_empty
    ) is False, "Bool behaviour of CharsetMatches altered, should be False"
def test_mb_cutting_chk():
    # This payload should be wrongfully split and the autofix should ran automatically
    # on chunks extraction.
    payload = b"\xbf\xaa\xbb\xe7\xc0\xfb    \xbf\xb9\xbc\xf6 " \
              b"   \xbf\xac\xb1\xb8\xc0\xda\xb5\xe9\xc0\xba  \xba\xb9\xc0\xbd\xbc\xad\xb3\xaa " * 128

    guesses = from_bytes(payload, cp_isolation=["cp949"])
    best_guess = guesses.best()

    assert len(
        guesses
    ) == 1, "cp isolation is set and given seq should be clear CP949!"
    assert best_guess.encoding == "cp949"
def test_obviously_utf8_content(payload):
    best_guess = from_bytes(payload).best()

    assert best_guess is not None, "Dead-simple UTF-8 detection has failed!"
    assert best_guess.encoding == "utf_8", "Dead-simple UTF-8 detection is wrongly detected!"
def test_obviously_ascii_content(payload):
    best_guess = from_bytes(payload).best()

    assert best_guess is not None, "Dead-simple ASCII detection has failed!"
    assert best_guess.encoding == "ascii", "Dead-simple ASCII detection is wrongly detected!"
def test_empty():
    best_guess = from_bytes(b'').best()

    assert best_guess is not None, "Empty bytes payload SHOULD NOT return None"
    assert best_guess.encoding == "utf_8", "Empty bytes payload SHOULD be guessed as UTF-8 (arbitrary)"
    assert len(best_guess.alphabets) == 0, ""
def test_content_with_bom_or_sig(payload, expected_encoding):
    best_guess = from_bytes(payload).best()

    assert best_guess is not None, "Detection but with SIG/BOM has failed!"
    assert best_guess.encoding == expected_encoding, "Detection but with SIG/BOM is wrongly detected!"
    assert best_guess.byte_order_mark is True, "The BOM/SIG property should return True"
 def test_explain_true_behavior(self, caplog):
     test_sequence = b'This is a test sequence of bytes that should be sufficient'
     from_bytes(test_sequence, steps=1, chunk_size=50, explain=True)
     assert explain_handler not in self.logger.handlers
     for record in caplog.records:
         assert record.levelname in ["Level 5", "DEBUG"]