def test_check_for_delimiter(self): comparable = Compare() comparable.file_name = os.path.join(self.MOCK_DATA_DIR, "simple_csv_file.csv") comparable.encoding = "utf-8" comparable.delimiter = "," assert input_validator.check_for_delimiter(comparable) is None
def test_check_for_non_comma_delimiter(self): comparable = Compare() comparable.file_name = os.path.join(self.MOCK_DATA_DIR, "none_csv_file.txt") comparable.encoding = "utf-8" comparable.delimiter = "," with pytest.raises(AppErrorHandler): input_validator.check_for_delimiter(comparable)
def test_verify_file_encoding_when_can_detect_encoding(self): not_able_to_detect_encoding_names = self.create_mock_data_file_for_encoding_types( "failing_encoding_names.csv") comparator_obj = Compare() for encoding_val in not_able_to_detect_encoding_names: mock_file_path = os.path.join(self.MOCK_DATA_DIR, f'{encoding_val}.csv') comparator_obj.file_name = mock_file_path comparator_obj.encoding = encoding_val assert input_validator.check_for_file_encoding( comparator_obj) is None
def test_verify_file_encoding_when_can_not_detect_encoding(self): not_able_to_detect_encoding_names = self.create_mock_data_file_for_encoding_types( "failing_encoding_names.csv") comparator_obj = Compare() for encoding_val in not_able_to_detect_encoding_names: with pytest.raises(AppErrorHandler): mock_file_path = os.path.join(self.MOCK_DATA_DIR, f'{encoding_val}.csv') comparator_obj.file_name = mock_file_path comparator_obj.encoding = 'ascii' input_validator.check_for_file_encoding(comparator_obj)
def set_file_encoding(comparable: Compare): with open(comparable.file_name, 'rb') as file: detector = UniversalDetector() for line in file.readlines(): detector.feed(line) if detector.done: break detector.close() if detector.result["confidence"] < 0.5 or \ detector.result["encoding"] is None: raise AppErrorHandler(AppErrorHandler.unknown_encoding) else: comparable.encoding = detector.result['encoding'].lower()