def test_check_for_delimiter(self):
     comparable = Compare()
     comparable.file_name = os.path.join(self.MOCK_DATA_DIR,
                                         "simple_csv_file.csv")
     comparable.encoding = "utf-8"
     comparable.delimiter = ","
     assert input_validator.check_for_delimiter(comparable) is None
 def test_check_for_non_comma_delimiter(self):
     comparable = Compare()
     comparable.file_name = os.path.join(self.MOCK_DATA_DIR,
                                         "none_csv_file.txt")
     comparable.encoding = "utf-8"
     comparable.delimiter = ","
     with pytest.raises(AppErrorHandler):
         input_validator.check_for_delimiter(comparable)
 def test_verify_file_encoding_when_can_detect_encoding(self):
     not_able_to_detect_encoding_names = self.create_mock_data_file_for_encoding_types(
         "failing_encoding_names.csv")
     comparator_obj = Compare()
     for encoding_val in not_able_to_detect_encoding_names:
         mock_file_path = os.path.join(self.MOCK_DATA_DIR,
                                       f'{encoding_val}.csv')
         comparator_obj.file_name = mock_file_path
         comparator_obj.encoding = encoding_val
         assert input_validator.check_for_file_encoding(
             comparator_obj) is None
 def test_verify_file_encoding_when_can_not_detect_encoding(self):
     not_able_to_detect_encoding_names = self.create_mock_data_file_for_encoding_types(
         "failing_encoding_names.csv")
     comparator_obj = Compare()
     for encoding_val in not_able_to_detect_encoding_names:
         with pytest.raises(AppErrorHandler):
             mock_file_path = os.path.join(self.MOCK_DATA_DIR,
                                           f'{encoding_val}.csv')
             comparator_obj.file_name = mock_file_path
             comparator_obj.encoding = 'ascii'
             input_validator.check_for_file_encoding(comparator_obj)
def set_file_encoding(comparable: Compare):
    with open(comparable.file_name, 'rb') as file:
        detector = UniversalDetector()
        for line in file.readlines():
            detector.feed(line)
            if detector.done:
                break
        detector.close()

    if detector.result["confidence"] < 0.5 or \
            detector.result["encoding"] is None:
        raise AppErrorHandler(AppErrorHandler.unknown_encoding)
    else:
        comparable.encoding = detector.result['encoding'].lower()