Example #1
0
def test_clean_up_invalid_arg_in_str():
    """
    Test that the clean up function raises the correct exception for an invalid in_str type.
    """
    txt_man = TextManager()
    with pytest.raises(TypeError):
        txt_man.clean_up(123)
Example #2
0
def test_clean_up_invalid_arg_deplorables_3():
    """
    Test that the clean up function raises the correct exception for an invalid deplorables type.
    Particularly, checks to see if it is not a list of srings.
    """
    txt_man = TextManager()
    with pytest.raises(TypeError):
        txt_man.clean_up('', ['almost', 'but not quite', 3.3])
Example #3
0
def test_clean_up_invalid_arg_deplorables_1():
    """
    Test that the clean up function raises the correct exception for an invalid deplorables type.
    Particularly, checks to see if it is not a list.
    """
    txt_man = TextManager()
    with pytest.raises(TypeError):
        txt_man.clean_up('legit', 'not legit')
Example #4
0
def test_clean_up_remove_multiple_spaces():
    """
    Test the removal of multiple spaces in the clean up function.
    """
    txt_man = TextManager()
    in_str = (
        'Identity Number\n'
        '123456789\n'
        'Surname\n'
        'Doe\n'
        'Names\n'
        'John     Michael   Robert'
    )
    assert txt_man.clean_up(in_str) == (
        'Identity Number\n'
        '123456789\n'
        'Surname\n'
        'Doe\n'
        'Names\n'
        'John Michael Robert'
    )
Example #5
0
def test_clean_up_remove_default():
    """
    Test the default clean up function's removal.
    """
    txt_man = TextManager()
    in_str = (
        'Identity #Number\n'
        '123456789...\n'
        '$Sur_name&\n'
        '\\/Doe.\n'
        'Names*\n'
        'John-Michae|l\n'
        'R%obert+'
    )
    assert txt_man.clean_up(in_str) == (
        'Identity Number\n'
        '123456789\n'
        'Surname\n'
        'Doe\n'
        'Names\n'
        'John-Michael\n'
        'Robert'
    )
Example #6
0
def test_clean_up_unicode_support():
    """
    Test support for unicode characters in the cleanup function.
    """
    txt_man = TextManager()
    in_str = (
        'Identity Number\n'
        '123456789\n'
        'Surname\n'
        'Döe\n'
        'Names\n'
        'John-Micháel\n'
        'Robert'
    )
    assert txt_man.clean_up(in_str) == (
        'Identity Number\n'
        '123456789\n'
        'Surname\n'
        'Döe\n'
        'Names\n'
        'John-Micháel\n'
        'Robert'
    )
Example #7
0
def test_clean_up_remove_specified():
    """
    Test the clean up function's removal with an additional list of characters to remove.
    """
    txt_man = TextManager()
    in_str = (
        'Identity Number\n'
        '123456789\n'
        'Surname\n'
        'Döe\n'
        'Names\n'
        'John+Michael\n'
        'Robert'
    )
    assert txt_man.clean_up(in_str, ['+', 'ö']) == (
        'Identity Number\n'
        '123456789\n'
        'Surname\n'
        'De\n'
        'Names\n'
        'JohnMichael\n'
        'Robert'
    )
Example #8
0
def test_clean_up_remove_specified_sanitise():
    """
    Test the clean up function's removal with an additional list of characters to remove, but tests to see if certain
    control characters used within the underlying regex, such as ], [, ^ and -, are escaped.
    """
    txt_man = TextManager()
    in_str = (
        'Identity Number\n'
        '123456789\n'
        'Surname\n'
        'Doe[^-]\n'
        'Names\n'
        'John-Michael\n'
        'Robert'
    )
    assert txt_man.clean_up(in_str, [']', '[', '^', '-']) == (
        'Identity Number\n'
        '123456789\n'
        'Surname\n'
        'Doe\n'
        'Names\n'
        'JohnMichael\n'
        'Robert'
    )
Example #9
0
def test_clean_up_empty_in_str():
    """
    Test the case in which an empty string is passed to the cleanup function.
    """
    txt_man = TextManager()
    assert txt_man.clean_up('') == ''
    def extract(self, img):
        """
        This function is a sample that demonstrates how text would be extracted
        Author(s):
            Nicolai van Niekerk
        Args:
            img: The image of the ID that contains the text to be extracted
        Returns:
            id_details: JSON obj (The extracted information)
        """
        if 'remove_face' in self.preferences:
            self.remove_face = self.preferences['remove_face'] == 'true'
        logger.debug('self.remove_face: ' + str(self.remove_face))

        simplification_manager = SimplificationManager()
        barcode_manager = BarCodeManager()
        data = {}

        # Perform perspective transformation and read from barcode.
        logger.info('Performing perspective transformation...')
        image = simplification_manager.perspectiveTransformation(img)
        cv2.imwrite(DESKTOP + "/output/3.png", image)
        barcode_data_found, barcode_scan_data, barcoded_image = barcode_manager.get_barcode_info(
            image)
        if barcode_data_found:
            logger.info('Barcode successfully scanned')
            data = {
                'identity_number': barcode_scan_data.decode('utf-8'),
            }

        # Process image
        if 'id_type' in self.preferences:
            identification_type = self.preferences['id_type']
            logger.info("No template matching required")
            logger.info("Identification type: " + identification_type)
        else:
            template_match = TemplateMatching()
            logger.info('Performing template matching...')
            identification_type = template_match.identify(barcoded_image)

        logger.info('Constructing text extraction pipeline')
        pipeline = BuildDirector.construct_text_extract_pipeline(
            self.preferences, identification_type)
        image = pipeline.process_text_extraction(barcoded_image,
                                                 self.remove_face)

        # Extract and return text
        filename = "{}.png".format(os.getpid())
        cv2.imwrite(filename, image)

        text = pytesseract.image_to_string(Image.open(filename))
        os.remove(filename)

        text_manager = TextManager()
        # Log the uncleaned string to terminal.
        # This is for demonstration purposes.
        logger.debug('-' * 50)
        logger.debug('String to clean:')
        logger.debug('-' * 50)
        [logger.debug(log_line) for log_line in text.split('\n')]
        logger.debug('-' * 50)
        logger.info('Cleaning up text...')
        # Clean the OCR output text.
        clean_text = text_manager.clean_up(text)
        # Log the cleaned string to terminal.
        # This is for demonstration purposes.
        logger.debug('-' * 50)
        logger.debug('Cleaned text:')
        logger.debug('-' * 50)
        [logger.debug(log_line) for log_line in clean_text.split('\n')]
        logger.debug('-' * 50)
        # Cater for UP student/staff cards.
        if identification_type == 'studentcard':
            return {
                'up_card':
                True,  # Used to be able to reliably check if a response is a UP card from client-side.
                'text_dump': clean_text,  # Dump extracted and cleaned text.
                'barcode_dump': data['identity_number']
                if data else None  # Dump the barcode data.
            }
        # Dictify cleaned text.
        logger.info('Placing extracted text in a dictionary...')
        id_details = text_manager.dictify(clean_text, data)
        # Log the dictified extracted text to terminal.
        # This is for demonstration purposes.
        logger.debug('-' * 50)
        logger.debug('Extracted ID details:')
        logger.debug('-' * 50)
        [
            logger.debug(id_details_line) for id_details_line in
            prettify_json_message(id_details).split('\n')
        ]
        logger.debug('-' * 50)
        # Return the extracted ID information.
        return id_details