def test_clean_up_invalid_arg_in_str(): """ Test that the clean up function raises the correct exception for an invalid in_str type. """ txt_man = TextManager() with pytest.raises(TypeError): txt_man.clean_up(123)
def test_clean_up_invalid_arg_deplorables_3(): """ Test that the clean up function raises the correct exception for an invalid deplorables type. Particularly, checks to see if it is not a list of srings. """ txt_man = TextManager() with pytest.raises(TypeError): txt_man.clean_up('', ['almost', 'but not quite', 3.3])
def test_clean_up_invalid_arg_deplorables_1(): """ Test that the clean up function raises the correct exception for an invalid deplorables type. Particularly, checks to see if it is not a list. """ txt_man = TextManager() with pytest.raises(TypeError): txt_man.clean_up('legit', 'not legit')
def test_clean_up_remove_multiple_spaces(): """ Test the removal of multiple spaces in the clean up function. """ txt_man = TextManager() in_str = ( 'Identity Number\n' '123456789\n' 'Surname\n' 'Doe\n' 'Names\n' 'John Michael Robert' ) assert txt_man.clean_up(in_str) == ( 'Identity Number\n' '123456789\n' 'Surname\n' 'Doe\n' 'Names\n' 'John Michael Robert' )
def test_clean_up_remove_default(): """ Test the default clean up function's removal. """ txt_man = TextManager() in_str = ( 'Identity #Number\n' '123456789...\n' '$Sur_name&\n' '\\/Doe.\n' 'Names*\n' 'John-Michae|l\n' 'R%obert+' ) assert txt_man.clean_up(in_str) == ( 'Identity Number\n' '123456789\n' 'Surname\n' 'Doe\n' 'Names\n' 'John-Michael\n' 'Robert' )
def test_clean_up_unicode_support(): """ Test support for unicode characters in the cleanup function. """ txt_man = TextManager() in_str = ( 'Identity Number\n' '123456789\n' 'Surname\n' 'Döe\n' 'Names\n' 'John-Micháel\n' 'Robert' ) assert txt_man.clean_up(in_str) == ( 'Identity Number\n' '123456789\n' 'Surname\n' 'Döe\n' 'Names\n' 'John-Micháel\n' 'Robert' )
def test_clean_up_remove_specified(): """ Test the clean up function's removal with an additional list of characters to remove. """ txt_man = TextManager() in_str = ( 'Identity Number\n' '123456789\n' 'Surname\n' 'Döe\n' 'Names\n' 'John+Michael\n' 'Robert' ) assert txt_man.clean_up(in_str, ['+', 'ö']) == ( 'Identity Number\n' '123456789\n' 'Surname\n' 'De\n' 'Names\n' 'JohnMichael\n' 'Robert' )
def test_clean_up_remove_specified_sanitise(): """ Test the clean up function's removal with an additional list of characters to remove, but tests to see if certain control characters used within the underlying regex, such as ], [, ^ and -, are escaped. """ txt_man = TextManager() in_str = ( 'Identity Number\n' '123456789\n' 'Surname\n' 'Doe[^-]\n' 'Names\n' 'John-Michael\n' 'Robert' ) assert txt_man.clean_up(in_str, [']', '[', '^', '-']) == ( 'Identity Number\n' '123456789\n' 'Surname\n' 'Doe\n' 'Names\n' 'JohnMichael\n' 'Robert' )
def test_clean_up_empty_in_str(): """ Test the case in which an empty string is passed to the cleanup function. """ txt_man = TextManager() assert txt_man.clean_up('') == ''
def extract(self, img): """ This function is a sample that demonstrates how text would be extracted Author(s): Nicolai van Niekerk Args: img: The image of the ID that contains the text to be extracted Returns: id_details: JSON obj (The extracted information) """ if 'remove_face' in self.preferences: self.remove_face = self.preferences['remove_face'] == 'true' logger.debug('self.remove_face: ' + str(self.remove_face)) simplification_manager = SimplificationManager() barcode_manager = BarCodeManager() data = {} # Perform perspective transformation and read from barcode. logger.info('Performing perspective transformation...') image = simplification_manager.perspectiveTransformation(img) cv2.imwrite(DESKTOP + "/output/3.png", image) barcode_data_found, barcode_scan_data, barcoded_image = barcode_manager.get_barcode_info( image) if barcode_data_found: logger.info('Barcode successfully scanned') data = { 'identity_number': barcode_scan_data.decode('utf-8'), } # Process image if 'id_type' in self.preferences: identification_type = self.preferences['id_type'] logger.info("No template matching required") logger.info("Identification type: " + identification_type) else: template_match = TemplateMatching() logger.info('Performing template matching...') identification_type = template_match.identify(barcoded_image) logger.info('Constructing text extraction pipeline') pipeline = BuildDirector.construct_text_extract_pipeline( self.preferences, identification_type) image = pipeline.process_text_extraction(barcoded_image, self.remove_face) # Extract and return text filename = "{}.png".format(os.getpid()) cv2.imwrite(filename, image) text = pytesseract.image_to_string(Image.open(filename)) os.remove(filename) text_manager = TextManager() # Log the uncleaned string to terminal. # This is for demonstration purposes. logger.debug('-' * 50) logger.debug('String to clean:') logger.debug('-' * 50) [logger.debug(log_line) for log_line in text.split('\n')] logger.debug('-' * 50) logger.info('Cleaning up text...') # Clean the OCR output text. clean_text = text_manager.clean_up(text) # Log the cleaned string to terminal. # This is for demonstration purposes. logger.debug('-' * 50) logger.debug('Cleaned text:') logger.debug('-' * 50) [logger.debug(log_line) for log_line in clean_text.split('\n')] logger.debug('-' * 50) # Cater for UP student/staff cards. if identification_type == 'studentcard': return { 'up_card': True, # Used to be able to reliably check if a response is a UP card from client-side. 'text_dump': clean_text, # Dump extracted and cleaned text. 'barcode_dump': data['identity_number'] if data else None # Dump the barcode data. } # Dictify cleaned text. logger.info('Placing extracted text in a dictionary...') id_details = text_manager.dictify(clean_text, data) # Log the dictified extracted text to terminal. # This is for demonstration purposes. logger.debug('-' * 50) logger.debug('Extracted ID details:') logger.debug('-' * 50) [ logger.debug(id_details_line) for id_details_line in prettify_json_message(id_details).split('\n') ] logger.debug('-' * 50) # Return the extracted ID information. return id_details