Example #1
0
    def __check_yacc(self, check_lex_data=None):
        """
        Returns:
            {
                'latex': self.latex,
                'latex_list': self.latex_list,
                'latex_string': self.latex_string,
                'yacc_errors_history': self.yacc_errors_history,
                'lex_errors_history': self.lex_errors_history,
                'yacc_pure_errors': self.pure_yacc_errors,
                'lex_pure_errors': self.pure_lex_errors # Não está sendo adicionado aqui
            }
        """

        helpers.debug('[check_grammar.py] __check_yacc()')
        helpers.debug('[check_grammar.py] __check_yacc() | \
            before CheckSintax() ')

        cgs = check_grammar_yacc.CheckSintax()
        cgs.attempts = self.__attempts_grammar

        if check_lex_data:
            cgs.set_lex_data(check_lex_data)

        check_data_yacc = cgs.check_correct_grammar()

        return check_data_yacc
Example #2
0
 def resize_full_image(self, image):
     helpers.debug('[preprocessing.py] resize_full_image()')
     h, w = image.shape[:2]
     if w > 4000:
         width = int(w * 20 / 100)
         r = width / float(w)
         size = (width, int(h * r))
         image = cv2.resize(image, size)
     return image
Example #3
0
 def binarization(self, image):
     helpers.debug('[preprocessing.py] binarization()')
     img = image.copy()
     img = self.invert(img)
     # img, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 2)
     img = cv2.adaptiveThreshold(img, 255, cv2.ADAPTIVE_THRESH_MEAN_C,
                                 cv2.THRESH_BINARY, 9, 2)
     img = self.invert(img)
     return img
Example #4
0
    def to_gray_denoise(self, image):
        helpers.debug('[preprocessing.py] to_gray_denoise()')
        img = image.copy()

        if img.ndim == 3:
            img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

        img = cv2.fastNlMeansDenoising(img, None, 5, 9)
        img = np.array(img)
        return img
Example #5
0
 def get_new_index(pred):
     helpers.debug("[base_grammar.py] correct_grammar_lex() | \
         Reset prediction of current symbol")
     new_pred = pred.copy()
     new_pred[0][np.argmax(pred)] = 0
     helpers.debug("[base_grammar.py] correct_grammar_lex() | \
         Gets new index and prediction from \
         next index with higher prediction")
     new_index = np.argmax(new_pred)
     return new_index, new_pred
Example #6
0
    def __tree_to_list(self, tree, node=None):
        helpers.debug('[parser.py] __tree_to_list()')
        latex = []

        def recur(root_node):
            current = tree.root_node if not root_node else root_node

            if current is None:
                return

            if isinstance(current.data, str):
                latex.append(current.data)
            else:
                try:
                    real_label = labels[current.data['label']]

                    if real_label == '{':
                        real_label = '\\{'
                    if real_label == '}':
                        real_label = '\\}'

                    current.data['label'] = real_label
                    latex.append(current.data)
                except BaseException as e:
                    print('Exception: ', e)

            if current.node_type == 'RegionNode':
                latex.append('{')

            for node in current.children:
                recur(node)

            if current.node_type == 'RegionNode':
                latex.append('}')

        recur(node)

        if latex[0] == 'Expression':
            latex.remove('Expression')
            if latex[-1] == "}":
                latex.pop()
            if latex[0] == '{':
                latex.reverse()
                latex.pop()
                latex.reverse()

        return latex
Example #7
0
    def __list_to_latex_obj(self, tlist):
        """ It turns the list into a dict:
            latex.append({
                'label': symbol[],
                'prediction': [],
                'type': ''
            })
        """

        helpers.debug('[parser.py] __list_to_latex_obj()')

        latex = []

        for symbol in tlist:
            if isinstance(symbol, dict):
                latex.append({
                    'label': symbol['label'],
                    'prediction': symbol['prediction'] \
                    if 'prediction' in symbol else [],
                    'type': symbol['type'] or ''
                })
            else:
                latex.append({
                    'label': symbol,
                    'prediction': [],
                    'type': 'context'
                })

        grammar = {
            '-': 'frac',
            'below': 'below',
            'sqrt': 'sqrt',
            'super': 'super',
            '*': 'mult',
            'subsc': 'subsc',
            'neq': 'neq'
        }

        subst = helpers.subst

        print('\n\n')
        latex = self.__token_substitution(latex, grammar, subst)
        print('\n\n')

        return latex
Example #8
0
    def check(self, latex_data):
        """
        Returns:
            {
                'latex': self.latex,
                'latex_list': self.latex_list,
                'latex_string': self.latex_string,
                'yacc_errors_history': self.yacc_errors_history,
                'lex_errors_history': self.lex_errors_history,
                'yacc_pure_errors': self.pure_yacc_errors,
                'lex_pure_errors': self.pure_lex_errors # Não está sendo adicionado aqui
            }
        """
        helpers.debug("[check_grammar.py] check()")
        helpers.debug("[check_grammar.py] latex_data: {0}".format(latex_data))

        latex = latex_data['latex']
        latex_list = latex_data['latex_list']
        latex_string = latex_data['latex_string']
        lstring = latex_data['lstring']

        helpers.debug("[check_grammar.py] check() | Latex List:")
        helpers.debug(latex_list)
        helpers.debug("[check_grammar.py] check() | \
            Latex String: %s " % latex_string)

        try:
            check_lex_data = self.__check_lex(latex_string, latex, latex_list)
            check_yacc_data = self.__check_yacc(check_lex_data)

            if check_lex_data['latex_string'] != -1 and \
            check_lex_data['latex_string'] is not None:

                lstring = check_lex_data['latex_string']

            check_yacc_data.update({'latex_string_original': latex_string})

            return check_yacc_data

        except (GrammarError, SintaticError, LexicalError) as e:
            e.data.update({'latex_string_original': latex_string})
            raise e

        except BaseException as e:
            raise e
Example #9
0
    def __start(self, listin, R):
        helpers.debug('\n[parser.py] __start()')
        print('[parser.py] __start() | region [R]: ', R)

        # Adicionei o round() | tirei o round
        left = R[0][0]
        top = R[0][1]
        right = R[1][0]
        bottom = R[1][1]

        helpers.debug('[parser.py] __start() | region [R]: \
            left: %d, right: %d,\
            top: %d, bottom: %d' % (left, right, top, bottom))

        leftmostIndex = -1
        listIndex = 0
        overlapIndex = -1
        n = len(listin)

        while leftmostIndex == -1 and listIndex < n:
            helpers.debug('[parser.py] __start() | ... \
                symbol index: %d' % listIndex)
            helpers.debug('[parser.py] __start() | ... \
                symbol label: %s' % listin[listIndex]['label'])
            print(
                '[parser.py]__start() | ... \
                symbol centroid: ', listin[listIndex]['centroid'])

            if not listin[listIndex]['checked'] and \
                listin[listIndex]['centroid'][0] >= left and \
                listin[listIndex]['centroid'][1] >= top and \
                listin[listIndex]['centroid'][0] <= right and \
                listin[listIndex]['centroid'][1] <= bottom:
                leftmostIndex = listIndex
            else:
                listIndex = listIndex + 1

        helpers.debug('[parser.py] __start() | \
            leftmostIndex: %d' % leftmostIndex)

        if leftmostIndex == -1:
            return leftmostIndex
        else:
            return self.__overlap(leftmostIndex, top, bottom, listin)
Example #10
0
    def to_parse(self):
        """
        Returns:
            {
                'latex': self.latex,
                'latex_list': self.latex_list,
                'latex_string': self.latex_string,
                'yacc_errors_history': self.yacc_errors_history,
                'lex_errors_history': self.lex_errors_history,
                'yacc_pure_errors': self.pure_yacc_errors,
                'lex_pure_errors': self.pure_lex_errors # It is not being added here
                'latex_before_cg': latex_before_cg,
                'tree': ...,
                'tlist': ...
            }
        """

        helpers.debug('[parser.py] to_parse()')

        try:
            structural_analysis = sa.StructuralAnalysis(self.symbols)
            structured_data = structural_analysis.analyze()

            if not structured_data:
                return

            latex_data = self.organize_latex_data(structured_data['latex'])

            check_grammar = cg.CheckGrammar()
            check_grammar_data = check_grammar.check(latex_data)

            data = {}
            data.update(check_grammar_data)
            data.update({'latex_before_cg': structured_data['latex']})
            data.update({
                'tree': structured_data['tree'],
                'tlist': structured_data['tlist']
            })

            return data

        except Exception as e:
            print('error')
            raise e
Example #11
0
    def treatment(self, img):
        helpers.debug('[preprocessing.py] treatment()')
        try:
            if type(img) is str:
                image = cv2.imread(img)
            else:
                image = img

            original = image.copy()
            image = self.resize_full_image(image)
            normalized = self.normalize(image)
            self.image = normalized.copy()

            symbols = self.segment(normalized)

            return symbols
        except BaseException as e:
            print(e)
            return []
Example #12
0
    def normalize(self, image):
        helpers.debug('[preprocessing.py] normalize()')
        img = image.copy()
        img = self.to_gray_denoise(image)

        if not self.configs['black']:
            img = self.invert(img)

        kernel = np.ones((2, 2), np.uint8)

        if 'dilate' in self.configs:
            if self.configs['dilate']:
                img = cv2.dilate(img, kernel, iterations=2)

        if 'erode' in self.configs:
            if self.configs['erode']:
                img = cv2.erode(img, kernel, iterations=1)

        return img
Example #13
0
    def __locate_grammar_error(self, yacc_error_list):
        helpers.debug("\n[check_grammar_sintax.py] __locate_grammar_error() | \
            Locating all errors and creating a data structure.")

        yacc_error_list = yacc_error_list.copy()
        latex = self.latex.copy()

        yacc_errors = []
        yacc_errors_history = self.yacc_errors_history.copy()

        helpers.debug("[check_grammar_sintax.py] __locate_grammar_error() | \
            Errors: {0}".format(yacc_error_list))

        for error in yacc_error_list:

            if error['value'] is not None:

                helpers.debug("[check_grammar_sintax.py] \
                    __locate_grammar_error() | ...for() value not none")

                count = 0
                count_list = 0

                latex_error_pos = error['lexpos']
                latex_error_token = error['value']

                for symbol in latex:

                    if symbol['label'] == latex_error_token and \
                    count == latex_error_pos:

                        yacc_errors.append({
                            'pos': latex_error_pos,
                            'pos_list': count_list,
                            'label': symbol['label'],
                            'prediction': symbol['prediction'],
                            # It adds itself as a attempt of solution
                            'attempts': [symbol['label']]
                        })

                        yacc_errors_history.extend(yacc_errors)

                        break

                    count += len(symbol['label'])
                    count_list += 1

            else:
                helpers.debug("Use automata to fix")
                continue

        return yacc_errors, yacc_errors_history
Example #14
0
    def __find_lexical_errors(self):
        helpers.debug("\n................FIND LEXICAL ERRORS................")

        cgl = check_grammar_lex.CheckLex()

        cgl.latex_string = self.latex_string
        cgl.latex = self.latex
        cgl.latex_list = self.latex_list
        cgl.attempts = 0

        check_lex_data = cgl.check_correct_lex()
        # If the executtion got here is because the error was solved

        self.latex = check_lex_data['latex']
        self.latex_list = check_lex_data['latex_list']
        self.latex_string = check_lex_data['latex_string']
        self.pure_lex_errors = check_lex_data['pure_errors']
        lex_errors_history = check_lex_data['errors_history']
        self.lex_errors_history.extend(lex_errors_history)

        helpers.debug("...................................................")
Example #15
0
    def __attempt_to_fix_error(self, lex_errors):
        helpers.debug("[check_grammar_lex.py] self.__attempt_to_fix_error() \
            | Tries to fix the error.")

        # It tries to solve the FIRST error and returns an updated list of errors
        bg = correct_grammar.CorrectGrammar()

        # lex_errors: current error, self.lex_errors_history: all errors
        corrected_data = bg.correct_grammar_lex(lex_errors, self.latex,
                                                self.latex_list, 0,
                                                self.lex_errors_history)

        update_latex_string = corrected_data['latex_string']
        # Updated error with attempt
        self.lex_error_list = corrected_data['errors']
        self.index = corrected_data['index']

        # If there are remaining errors
        if self.lex_error_list:
            self.lex_errors_history = self.lex_error_list.copy()

        helpers.debug("[check_grammar_lex.py] self.__attempt_to_fix_error() | \
            Updated lex error: {0}".format(self.lex_error_list))
        helpers.debug("[check_grammar_lex.py] self.__attempt_to_fix_error() | \
            Updated lex error history: {0}".format(self.lex_errors_history))

        # if update_latex_string:
        self.latex_string = update_latex_string
        self.attempts += 1
        return self.check_correct_lex()
Example #16
0
                def recur_get_new_index(pred):
                    new_index, pred = get_new_index(pred)
                    label_recog = helpers_labels[json_label][str(new_index)]
                    new_label = helpers_labels["labels_recognition"][
                        label_recog]
                    new_identification = labels[new_label]

                    if new_identification in errors[index]['attempts'] or \
                    new_identification in previous_attemptions:

                        helpers.debug("[base_grammar.py] \
                            correct_grammar_lex() | \
                            New index is in previous attempts. Getting next.")

                        return recur_get_new_index(pred)

                    else:

                        if new_identification == '{':
                            new_identification = '\\{'
                        if new_identification == '}':
                            new_identification = '\\}'

                        return new_index, pred, new_identification
Example #17
0
        def __change_label(i, label, substitution_list):
            for substitution_index in range(0, len(substitution_list)):
                nomatch = False
                aux = []

                helpers.debug('[parser.py] __list_to_latex_obj() | \
                    ...substitutions: ')
                helpers.debug(substitution_list[substitution_index])

                initial_index = i

                i, nomatch = __list_substitution(i, nomatch, aux,
                                                 initial_index, label,
                                                 substitution_list,
                                                 substitution_index)

                if not nomatch:
                    helpers.debug('[parser.py] __list_to_latex_obj() \
                        | match - updating value ')
                    for matched in aux:
                        latex[matched['index']]['label'] = matched['label']
            return i
Example #18
0
    def __attempt_to_fix_error(self, yacc_errors):
        helpers.debug('[check_grammar_sintax.py] __attempt_to_fix_error()')

        '''
            It tries to solve the FIRST error and
            returns an updated list of errors
        '''
        bg = correct_grammar.CorrectGrammar()

        # It join Lex and Yacc's attempts at solutions.
        fix_attempts = self.lex_errors_history.copy()
        fix_attempts.extend(self.yacc_errors_history)

        corrected_data = bg.correct_grammar_lex(yacc_errors, self.latex,
                                                self.latex_list, 0,
                                                fix_attempts)

        updated_latex_string = corrected_data['latex_string']
        # Updated error with attempt
        self.yacc_error_list = corrected_data['errors']
        self.index = corrected_data['index']

        # It there's remaining errors
        if self.yacc_error_list:
            self.yacc_errors_history = self.yacc_error_list.copy()

        helpers.debug("[check_grammar_yacc.py] \
            self.__attempt_to_fix_error() | \
            Updated yacc error: {0}".format(self.yacc_error_list))
        helpers.debug("[check_grammar_yacc.py] \
            self.__attempt_to_fix_error() | \
            Updated yacc error history: {0}".format(self.yacc_errors_history))

        # if updated_latex_string:
        self.latex_string = updated_latex_string
        self.attempts += 1
        return self.check_correct_grammar()
Example #19
0
    def __main_parsing(self, symbols):
        helpers.debug('\n[parser.py] __main_parsing()')
        listin = symbols
        T = DS.Tree()
        Q = DS.Queue()
        S = DS.Stack()

        temp1 = 0
        temp2 = 0

        R = [[0, 0], [9999999999, 9999999999]]
        sstart = self.__sp(listin, R)

        if sstart == -1:
            return

        helpers.debug('\n[parser.py] __main_parsing() | \
            STARTING symbol index: %d ' % sstart)
        helpers.debug('[parser.py] __main_parsing() | \
            STARTING symbol label: %s ' % listin[sstart]['label'])

        s = listin[sstart]

        Q.enqueue(sstart)
        Q.enqueue(T.root_node)
        listin[sstart]['checked'] = True

        while not Q.is_empty():
            '''
            abc^{2-1}
            =============     ->    | EOBL  |
                        a            | c  |
            =============           | b |
                                    | a |
            '''
            '''
            abc^{2-1]
            =============     ->    |   |
                        2           |   |
            =============           | - |
                                    | 2 |
            '''

            helpers.debug('\n[parser.py] __main_parsing() \
                | find main baseline')

            while not Q.is_empty():
                temp1 = Q.dequeue()  # a, 2
                ParentNode = Q.dequeue()
                SymbolNode = DS.SymbolNode(listin[temp1])
                T.insert(SymbolNode, ParentNode, 'Node')
                S.push(temp1)  # a, 2
                S.push(SymbolNode)

                print(
                    '\n[parser.py] __main_parsing() | \
                    find baseline of symbol: ', temp1, listin[temp1]['label'])

                helpers.debug('\n[parser.py] __main_parsing() | \
                    temp2 hor...')
                temp2 = self.__hor(listin, temp1)  # b, -
                print(
                    '\n[parser.py] __main_parsing() | \
                    temp2: ', temp2, listin[temp2]['label'])

                while temp2 != -1:
                    print('[parser.py] __main_parsing() | \
                        ... while temp2')
                    listin[temp2]['checked'] = True
                    print(
                        '[parser.py] __main_parsing() | \
                        ... wall attributes of temp1: ', listin[temp1]['wall'])
                    listin[temp2]['wall'] = listin[temp1]['wall'].copy()
                    '''
                    a.wall = -1 -1 9999 9999

                    b.checked = true
                    b.wall = a.wall (-1 -1 9999 9999)

                    c.checked = true
                    c.wall = b.wall
                    ---------------------------------------------
                    -.checked = true
                    -.wall = 2.wall (wall da região super?)

                    '''

                    print(
                        '[parser.py] __main_parsing() | \
                        ...wall attributes of temp2: ', listin[temp2]['wall'])

                    SymbolNode = DS.SymbolNode(listin[temp2])
                    T.insert(SymbolNode, ParentNode, 'Node')
                    S.push(temp2)
                    S.push(SymbolNode)
                    listin[temp1]['wall']['right'] = listin[temp2]['xmin']
                    '''
                    a.wall.right = b.xmin
                    b.wall.right = c.xmin
                    '''

                    print(
                        '[parser.py] __main_parsing() | \
                        ...updated wall attributes \
                        of temp1: ', listin[temp1]['wall'])

                    temp1 = temp2  # b
                    temp2 = self.__hor(listin, temp1)  # c - 1
                    print(
                        '[parser.py] __main_parsing() | \
                        new temp2: ', temp2)

            S.push("EOBL")

            helpers.debug('\n[parser.py] __main_parsing() \
                | find secondary baseline')
            '''
            abc^2
            =============     ->    | EOBL  |
                        2           | c  |
            =============           | b |
                                    | a |
            '''
            while not S.is_empty():
                if S.peek() == "EOBL":
                    S.pop()
                SymbolNode = S.pop()
                temp1 = S.pop()  # c

                helpers.debug('[parser.py] __main_parsing() \
                    | symbol: %s ' % temp1)

                label = int(listin[temp1]['label'])
                helpers.debug('[parser.py] __main_parsing() \
                    | temp1 label: %s' % label)

                # 1/6
                upperThreshold = listin[temp1]['ymin'] + \
                    ((1/6.5) * listin[temp1]['h'])
                # 5/6
                lowerThreshold = listin[temp1]['ymin'] + \
                    ((5.5/6.5) * listin[temp1]['h'])
                '''
                    Changes in xmin and xmax because of the 'a'
                    When it overlaps the fraction
                '''
                leftThreshold = (
                        listin[temp1]['xmin'] + ((1/6) * listin[temp1]['w'])
                    ) \
                    if label != 10 else listin[temp1]['xmin']

                rightThreshold = (
                    listin[temp1]['xmax'] - ((1/6) * listin[temp1]['w'])
                    ) \
                    if label != 10 else listin[temp1]['xmax']

                R = [{
                    'above': [[leftThreshold, listin[temp1]['wall']['top']],
                              [rightThreshold, upperThreshold]]
                }, {
                    'below':
                    [[leftThreshold, lowerThreshold],
                     [rightThreshold, listin[temp1]['wall']['bottom']]]
                }]

                for region in R:

                    # For each region, it looks for the initial symbol
                    reg = region[list(region.keys())[0]]
                    region_name = list(region.keys())[0]
                    helpers.debug('\n[parser.py] __main_parsing() | \
                        região: %s' % region_name)

                    # ( ) [ ] { } . * = neq + sqrt
                    operators = bool(label in range(11, 17)
                                     or label in range(27, 31) or label == 17
                                     or label == 23)

                    if (region_name == 'above' and not operators) or \
                    (region_name == 'below' and not operators):

                        temp2 = self.__start(listin, reg)

                        if temp2 != -1:
                            if not listin[temp2]['checked']:
                                listin[temp2]['checked'] = True
                                listin[temp2]['wall']['left'] = reg[0][0]
                                listin[temp2]['wall']['right'] = reg[1][0]
                                listin[temp2]['wall']['top'] = reg[0][1]
                                listin[temp2]['wall']['bottom'] = reg[1][1]

                                RelationNode = DS.RegionNode(
                                    list(region.keys())[0])
                                T.insert(RelationNode, SymbolNode, 'Node')
                                Q.enqueue(temp2)
                                Q.enqueue(RelationNode)
                '''
                    Changes in xmin and xmax because of the 'a'
                    When it overlaps the fraction
                '''
                R = [
                    {
                        'contains': [
                            # left, top
                            [listin[temp1]['xmin'], listin[temp1]['ymin']],
                            # right, bottom
                            [listin[temp1]['xmax'], listin[temp1]['ymax']]
                        ]
                    },
                    {
                        'super': [
                            # left, top
                            [rightThreshold, listin[temp1]['wall']['top']],
                            # right, bottom
                            [listin[temp1]['wall']['right'], upperThreshold]
                        ]
                    },
                    {
                        'subsc': [
                            # left, top
                            [rightThreshold, lowerThreshold],
                            # right, bottom
                            [
                                listin[temp1]['wall']['right'],
                                listin[temp1]['wall']['bottom']
                            ]
                        ]
                    }
                ]

                for region in R:
                    # Para cada região, busca o símbolo inicial
                    reg = region[list(region.keys())[0]]
                    region_name = list(region.keys())[0]
                    helpers.debug('\n[parser.py] __main_parsing() | \
                        região: %s' % region_name)

                    # - ( ) [ ] { } . * = neq +
                    operators = bool(label == 10 or label in range(27, 31)
                                     or label == 17)

                    if (region_name == 'super' and not operators) or \
                    (region_name == 'subsc' and not operators) or \
                    (region_name == 'contains' and \
                    int(listin[temp1]['label']) == 23):

                        temp2 = self.__start(listin, reg)

                        if temp2 != -1:
                            if not listin[temp2]['checked']:
                                listin[temp2]['checked'] = True
                                listin[temp2]['wall']['left'] = reg[0][0]
                                listin[temp2]['wall']['right'] = reg[1][0]
                                listin[temp2]['wall']['top'] = reg[0][1]
                                listin[temp2]['wall']['bottom'] = reg[1][1]

                                RelationNode = DS.RegionNode(
                                    list(region.keys())[0])
                                T.insert(RelationNode, SymbolNode, 'Node')
                                Q.enqueue(temp2)
                                Q.enqueue(RelationNode)

        return T
Example #20
0
    def __overlap(self, symbolIndex, top, bottom, listin):
        helpers.debug('\n\n[parser.py] __overlap()')
        listIndex = symbolIndex
        stop = False
        n = len(listin)

        helpers.debug('[parser.py] __overlap() | listIndex: %d ' % listIndex)

        if listin[symbolIndex]['label'] == '10':
            maxLength = listin[symbolIndex]['xmax'] - listin[symbolIndex][
                'xmin']
        else:
            maxLength = -1
        mainLine = -1

        helpers.debug('[parser.py] __overlap() | \
            mainLine: %d ' % mainLine)
        helpers.debug('[parser.py] __overlap() | \
            maxLength: %d ' % maxLength)

        while listIndex > 0 and stop == False:

            print('[parser.py] __overlap() | xmin, xmin',
                  listin[listIndex - 1]['xmin'], listin[symbolIndex]['xmin'])

            if listin[listIndex - 1]['xmin'] <= listin[symbolIndex]['xmin']:
                listIndex = listIndex - 1  # stop = True
            else:
                stop = True  # listIndex = listIndex - 1

        helpers.debug('[parser.py] __overlap() | \
            listIndex: %d ' % listIndex)
        helpers.debug('[parser.py] __overlap() | \
            n: %d ' % n)
        helpers.debug('[parser.py] __overlap() | \
            top: %d ' % top)
        helpers.debug('[parser.py] __overlap() | \
            bottom: %d ' % bottom)

        line1x = range(listin[symbolIndex]['xmin'],
                       listin[symbolIndex]['xmax'] + 1)

        len_line1x = len(line1x)

        while listIndex < n and \
        listin[listIndex]['xmin'] < listin[symbolIndex]['xmax']:

            line2x = range(listin[listIndex]['xmin'],
                           listin[listIndex]['xmax'] + 1)
            len_line2x = len(line2x)
            x_set = set(line1x) if len_line1x < len_line2x else set(line2x)
            x_intersection = x_set.intersection(
                line1x if len_line1x >= len_line2x else line2x)
            min_line = min(len_line1x, len_line2x)

            print(
                '\n[parser.py] __overlap() | ... \
                listIndex: ', listIndex)
            print('[parser.py] __overlap() | ... \
                label: ', listin[listIndex]['label'])
            print('[parser.py] __overlap() | ... \
                centroid: ', listin[listIndex]['centroid'])
            print('[parser.py] __overlap() | ... \
                xmin: ', listin[listIndex]['xmin'])
            print('[parser.py] __overlap() | ... \
                xmax: ', listin[listIndex]['xmax'])
            print(
                '[parser.py] __overlap() | ... \
                max length: ',
                (listin[listIndex]['xmax'] - listin[listIndex]['xmin']))
            print(
                '[parser.py] __overlap() | ... \
                len(x_intersection): ', len(x_intersection))
            print(
                '[parser.py] __overlap() | ... \
                min_line/2: ', min_line / 2)

            if not listin[listIndex]['checked'] and \
                listin[listIndex]['label'] == '10' and \
                listin[listIndex]['centroid'][1] >= top and \
                listin[listIndex]['centroid'][1] <= bottom and \
                listin[listIndex]['xmin'] <= (listin[symbolIndex]['xmin'] + 8) and \
                len(x_intersection) > (min_line/2) and \
                (listin[listIndex]['xmax'] - listin[listIndex]['xmin']) > maxLength:
                maxLength = (listin[listIndex]['xmax'] -
                             listin[listIndex]['xmin'])
                mainLine = listIndex

            listIndex += 1

        helpers.debug('[parser.py] __overlap() | listIndex: %d ' % listIndex)
        helpers.debug('[parser.py] __overlap() | mainLine: %d ' % mainLine)
        helpers.debug('[parser.py] __overlap() | maxLength: %d ' % maxLength)

        if mainLine == -1:
            return symbolIndex
        else:
            return mainLine
Example #21
0
 def invert(self, image):
     helpers.debug('[preprocessing.py] invert()')
     img = image.copy()
     return 255 - img
Example #22
0
 def __sp(self, listin, R):
     helpers.debug('\n[parser.py] __sp()')
     return self.__start(listin, R)
Example #23
0
    def __hor(self, listin, index):
        print('\n[parser.py] __hor()')
        print('[parser.py] __hor() | symbol index: ', index)
        print('[parser.py] __hor() | symbol label: ', listin[index]['label'])

        global stop
        stop = False
        global a
        a = -1

        label = int(listin[index]['label'])

        right = listin[index]['wall']['right']

        # to avoid get symbols behind
        left = listin[index]['xmin']

        # to treat expoent and subscript
        # 1/6
        top = listin[index]['ymin'] + (listin[index]['h'] * (1 / 6.5))
        # 5/6
        bottom = listin[index]['ymin'] + (listin[index]['h'] * (5.5 / 6.5))

        # it doesn't have expoent and subscript
        if label == 10 or label in [27, 28, 29, 30]:
            top = listin[index]['wall']['top']
            bottom = listin[index]['wall']['bottom']

        # if it is square root, the left wall id xmax
        if label == 23:
            left = listin[index]['xmax']

        # if it is horizontal line or brackets
        if label in range(10, 17):
            R = [[listin[index]['xmax'], top], [right, bottom]]
            print('[parser.py] __hor() | R', R)
            a = self.__start(listin, R)
            stop = True

        else:

            helpers.debug('[parser.py] __hor() | top: %d, bottom: %d, \
                left: %d, right: %d' % (top, bottom, left, right))

            for s in range(0, len(listin)):

                checked = listin[s]['checked']

                if not checked:
                    symbol = listin[s]

                    helpers.debug('[parser.py] __hor() | ... \
                        symbol: %s' % symbol['label'])

                    helpers.debug('[parser.py] __hor() | ... \
                        symbol centroid: %s ' % symbol['centroid'])

                    helpers.debug('[parser.py] __hor() | ... \
                        symbol coordinates: xmin: %s xmax: %s \
                        ymin: %s ymax: %s' % (symbol['xmin'], symbol['xmax'],
                                              symbol['ymin'], symbol['ymax']))

                    if symbol['centroid'][0] >= left and \
                        symbol['centroid'][0] <= right and \
                        symbol['centroid'][1] <= bottom and \
                        symbol['centroid'][1] >= top:
                        helpers.debug('[parser.py] __hor() | \
                                ......... founded: %s' % s)
                        a = s
                        stop = True
                        break

                    helpers.debug('[parser.py] __hor() | a: %d ' % a)

        if a != -1:
            helpers.debug('[parser.py] __hor() | \
                a label: %s ' % listin[a]['label'])

        if stop and a != -1:
            helpers.debug('[parser.py] __hor() | ... before overlap')
            return self.__overlap(a, listin[a]['wall']['top'],
                                  listin[a]['wall']['bottom'], listin)
        else:
            return -1
Example #24
0
    def segment(self, img):
        helpers.debug('[preprocessing.py] segment()')
        image = img.copy()
        symbols = []
        cnts, somethingElse = cv2.findContours(image.copy(), cv2.RETR_EXTERNAL,
                                               cv2.CHAIN_APPROX_SIMPLE)

        helpers.debug('[preprocessing.py] segment() | contours founded:')
        helpers.debug(len(cnts))

        for i in range(len(cnts)):
            # It was 0 and was changed to 10 to try reducing the noise
            if (cv2.contourArea(cnts[i]) < 10):
                continue

            if (self.configs['dataset'] and len(cnts) > 1):
                continue

            try:
                # Draw contour in new image (mask)
                mask = np.zeros_like(image)
                cv2.drawContours(mask, cnts, i, (255, 255, 255), -50)
                out = np.zeros_like(image)
                '''
                At the position where the mask is 255
                it paints the normalized position
                with the same positions from mask were it is 255

                I.e. mask has the inner part of "2" painted,
                but the normalized doesn't have it.

                It was changed to > 0 instead of 255
                this prevents the image from having to be binarized
                '''
                out[mask > 0] = image[mask > 0]

                # # Get bounding box coordinates
                _x, _y, _w, _h = cv2.boundingRect(cnts[i])

                # For now, it's worthless
                # ALL points where the mask == 255
                # list_y, list_x = np.where(out > 0)
                # (topx, topy) = (np.min(list_x), np.min(list_y))
                # (bottomx, bottomy) = (np.max(list_x), np.max(list_y))

                # Crop the image
                ycrop = _y + _h + 1
                xcrop = _x + _w + 1
                cropped = out[_y:ycrop, _x:xcrop]

                resized = self.resize(cropped)

                # Test - It was not here during validation
                binarized = self.binarization(resized)
                result_image = self._255_to_1(binarized)

                # result_image = self._255_to_1(resized)
                helpers.show_image(result_image)

                attributes = {
                    'index': i,
                    'image': result_image.copy(),
                    'xmin': _x,
                    'xmax': _x + _w,
                    'ymin': _y,
                    'ymax': _y + _h,
                    'w': _w,
                    'h': _h,
                    'centroid': [(_x + (_x + _w)) / 2, (_y + (_y + _h)) / 2]
                }

                symbols.append(attributes)

                mask = None
                out = None
                cropped = None
                resized = None
                binarized = None
                result_image = None

                # self.image = self.print_bounding_box(image, (_x, _y, _w, _h))
                self.image = image

            except BaseException as e:
                print(e)
                continue

        return (symbols, self.image)
Example #25
0
    def __token_substitution(self, latex, grammar, subst):
        def __list_substitution(i, nomatch, aux, initial_index, label,
                                substitution_list, substitution_index):

            for substitution in substitution_list[substitution_index]:
                helpers.debug('[parser.py] __list_to_latex_obj() | \
                    ......substitution: ')  # subsc
                helpers.debug('[parser.py] __list_to_latex_obj() | \
                    ......latex index: %d ' % i)  # subsc
                helpers.debug(substitution)  # subsc

                try:

                    helpers.debug('[parser.py] __list_to_latex_obj() | \
                        ......current latex: %s ' % latex[i]['label'])

                    # latex[i]['label'] = subst[subs][substitution]
                    if latex[i]['label'] == substitution:

                        helpers.debug('[parser.py] __list_to_latex_obj() | \
                            ......match ')

                        helpers.debug('[parser.py] __list_to_latex_obj() | \
                                    ......from: %s %s ' %
                                      (latex[i]['label'], substitution))

                        helpers.debug(
                            '[parser.py] __list_to_latex_obj() | \
                                    ......to: %s ' %
                            substitution_list[substitution_index][substitution]
                        )

                        aux.append({
                            "index":
                            i,
                            "label":
                            substitution_list[substitution_index][substitution]
                        })

                        i += 1

                    else:

                        helpers.debug('[parser.py] __list_to_latex_obj() | \
                            ......no match ')

                        i -= 1
                        nomatch = True

                except IndexError as e:
                    helpers.debug('[parser.py] __list_to_latex_obj() | \
                        ......no match: IndexError ')
                    nomatch = True
                    break

                helpers.debug('[parser.py] __list_to_latex_obj() | \
                    ...... no match value: %s ' % nomatch)
                if nomatch:
                    i = initial_index
                    helpers.debug('[parser.py] __list_to_latex_obj() | \
                        ...... continue ......')
                    break
                helpers.debug('[parser.py] __list_to_latex_obj() | \
                    ...... next ......')
            return i, nomatch

        def __change_label(i, label, substitution_list):
            for substitution_index in range(0, len(substitution_list)):
                nomatch = False
                aux = []

                helpers.debug('[parser.py] __list_to_latex_obj() | \
                    ...substitutions: ')
                helpers.debug(substitution_list[substitution_index])

                initial_index = i

                i, nomatch = __list_substitution(i, nomatch, aux,
                                                 initial_index, label,
                                                 substitution_list,
                                                 substitution_index)

                if not nomatch:
                    helpers.debug('[parser.py] __list_to_latex_obj() \
                        | match - updating value ')
                    for matched in aux:
                        latex[matched['index']]['label'] = matched['label']
            return i

        for i in range(0, len(latex)):
            if latex[i]['label'] in grammar:
                label = grammar[latex[i]['label']]

                if label in subst:
                    substitution_list = subst[label]  # list of substitutions
                    helpers.debug('[parser.py] __list_to_latex_obj() \
                        | substitution_list: ')
                    helpers.debug(substitution_list)

                    i = __change_label(i, label, substitution_list)

        return latex
Example #26
0
    def __preprocessing(self, symbols):
        helpers.debug('[parser.py] preprocessing()')
        xmin_sorted = sorted(symbols, key=lambda i: i['xmin'])
        symbols = xmin_sorted

        for i in range(0, len(symbols)):
            s = symbols[i]
            s['centroid'] = list(s['centroid'])
            s['checked'] = False

            if s['label'] in ['11', '13', '15']:
                s['type'] = 'Open'
            elif s['label'] in ['12', '14', '16']:
                s['type'] = 'Close'
            else:
                s['type'] = 'Normal'
            '''
                Centroid was too below or too above in these cases
                It was changed to: 1/3, 2/3
            '''
            '''
            [0-9] b
            s['ymin'] + (2/3) * (s['ymax'] - s['ymin'])

            y sqrt ()[]{}
            s['centroid'][1] = s['ymin'] + (1/3) * s['h']

            others
            s['centroid'][1] = s['ymin'] + ((s['ymax'] - s['ymin'])/2)
            '''
            '''
            Until validation:
                [0-9], b
            After validation:
                (), {}, [], sqrt
            '''
            if re.search("^[0-9]$", str(s['label'])) or \
                    s['label'] == '19' or s['label'] == '23' or \
                    s['type'] == 'Open' or s['type'] == 'Close':

                s['centroid_class'] = 'Ascending'
                s['centroid'][1] = s['ymin'] + (2 / 3) * (s['h'])  # 3/5

            elif s['label'] == '25':
                '''
                Until validation:
                    y sqrt ( { [
                After validation:
                    y sqrt
                '''
                s['centroid_class'] = 'Descending'
                s['centroid'][1] = s['ymin'] + (1 / 3) * s['h']

            else:
                s['centroid_class'] = 'Centred'
                s['centroid'][1] = s['ymin'] + ((s['h']) / 2)

            s['wall'] = {}
            s['wall']['top'] = -1
            s['wall']['bottom'] = 9999999999999
            s['wall']['left'] = -1
            s['wall']['right'] = 9999999999999

        return symbols
Example #27
0
 def _255_to_1(self, image):
     helpers.debug('[preprocessing.py] _255_to_1()')
     img = image.copy()
     return (img / 255)
Example #28
0
        def __list_substitution(i, nomatch, aux, initial_index, label,
                                substitution_list, substitution_index):

            for substitution in substitution_list[substitution_index]:
                helpers.debug('[parser.py] __list_to_latex_obj() | \
                    ......substitution: ')  # subsc
                helpers.debug('[parser.py] __list_to_latex_obj() | \
                    ......latex index: %d ' % i)  # subsc
                helpers.debug(substitution)  # subsc

                try:

                    helpers.debug('[parser.py] __list_to_latex_obj() | \
                        ......current latex: %s ' % latex[i]['label'])

                    # latex[i]['label'] = subst[subs][substitution]
                    if latex[i]['label'] == substitution:

                        helpers.debug('[parser.py] __list_to_latex_obj() | \
                            ......match ')

                        helpers.debug('[parser.py] __list_to_latex_obj() | \
                                    ......from: %s %s ' %
                                      (latex[i]['label'], substitution))

                        helpers.debug(
                            '[parser.py] __list_to_latex_obj() | \
                                    ......to: %s ' %
                            substitution_list[substitution_index][substitution]
                        )

                        aux.append({
                            "index":
                            i,
                            "label":
                            substitution_list[substitution_index][substitution]
                        })

                        i += 1

                    else:

                        helpers.debug('[parser.py] __list_to_latex_obj() | \
                            ......no match ')

                        i -= 1
                        nomatch = True

                except IndexError as e:
                    helpers.debug('[parser.py] __list_to_latex_obj() | \
                        ......no match: IndexError ')
                    nomatch = True
                    break

                helpers.debug('[parser.py] __list_to_latex_obj() | \
                    ...... no match value: %s ' % nomatch)
                if nomatch:
                    i = initial_index
                    helpers.debug('[parser.py] __list_to_latex_obj() | \
                        ...... continue ......')
                    break
                helpers.debug('[parser.py] __list_to_latex_obj() | \
                    ...... next ......')
            return i, nomatch
Example #29
0
    def resize(self, image):
        helpers.debug('[preprocessing.py] resize()')
        old_size = image.shape[:2]  # (height, width)
        height, width = old_size[0], old_size[1]

        ratio = float(26) / max(old_size)
        size = tuple([int(x * ratio) for x in old_size])

        size_height, size_width = size[0], size[1]
        size_height = size_height if size_height > 0 else 1
        size_width = size_width if size_width > 0 else 1

        division_height = int(height / 2)
        division_width = int(width / 2)
        around_w = round(width * 20 / 100)
        around_h = round(height * 20 / 100)

        middle_width = [
            image[division_height][division_width - 1],
            image[division_height][division_width],
            image[division_height][division_width + 1]
        ]

        middle_height = []
        for a in range(division_height - around_h, division_height + around_h):
            middle_height.append(image[a][division_width], )

        middle_width = []
        for b in range(division_width - around_w, division_width + around_w):
            middle_width.append(image[division_height][division_width], )

        helpers.debug('[preprocessing.py] segment() | \
            before line and sqrt processing')
        if size_height <= 15 and size_width >= 20 and \
            (any(i > 0.0000 for i in middle_height) or
                any(i > 0.0000 for i in middle_width)):

            # For horizontal line
            nsize = 4 if size_height < 5 else size_height
            nsize = 10 if size_height > 10 else size_height
            print('hor ', size_height)
            new_size = tuple([int(nsize), 26])

        else:
            if size_width / size_height >= 2:
                # For rectangle (sqrt)
                kernel = np.ones((2, 2), np.uint8)
                image = cv2.dilate(image, kernel, iterations=7)
                # xinit = int(width * 2 / 100) # validation
                # xend = int(width * 65 / 100) # validation
                xinit = int(width * 5 / 100)
                xend = int(width * 85 / 100)
                image = image[0:height, xinit:xend]

            new_size = size

        helpers.debug('[preprocessing.py] segment() | \
            after line and sqrt processing')

        helpers.debug('[preprocessing.py] segment() | \
            before resize')

        if self.configs['resize'] == 'smaller':
            image = cv2.resize(image.copy(), (new_size[1], new_size[0]),
                               interpolation=cv2.INTER_AREA)
        elif self.configs['resize'] == 'bigger':
            image = cv2.resize(image.copy(), (new_size[1], new_size[0]),
                               interpolation=cv2.INTER_LINEAR)
        helpers.debug('[preprocessing.py] segment() | after resize')

        # Cria borda ao redor do símbolo e normaliza para 28x28 px
        helpers.debug('[preprocessing.py] segment() | before border')
        delta_w = 28 - new_size[1]
        delta_h = 28 - new_size[0]
        top, bottom = delta_h // 2, delta_h - (delta_h // 2)
        left, right = delta_w // 2, delta_w - (delta_w // 2)
        color = [0, 0, 0]
        image = cv2.copyMakeBorder(image.copy(),
                                   top,
                                   bottom,
                                   left,
                                   right,
                                   cv2.BORDER_CONSTANT,
                                   value=color)

        helpers.debug('[preprocessing.py] segment() | after border')
        return image
Example #30
0
    def check_correct_lex(self):
        """Check and correct lex errors

        Args:
            latex_string (str): Latex string.
            latex (list): First latex structure.
            latex_list (list): [description]

        Returns:
            {
                'latex': self.latex,
                'latex_list': self.latex_list,
                'latex_string': self.latex_string,
                'lex_errors_history': self.lex_errors_history,
                'lex_pure_errors': self.pure_lex_errors
            }
        """

        helpers.debug("\n[check_grammar_lex.py] check_correct_lex() | \
            attempts: %s" % self.attempts)

        second_lex_error_list = None
        lex_errors = []

        if not self.lex_error_list and \
        self.__first_error and \
        self.attempts < 3 and \
        self.latex_string:

            helpers.debug("\n[check_grammar_lex.py] check_correct_lex() | \
                There's no previous error. Searching the first one.")

            lex_error_list = lex.LatexLexer(self.latex_string)

            if lex_error_list:

                self.pure_lex_errors.extend(lex_error_list)
                helpers.debug("[check_grammar_lex.py] check_correct_lex() | \
                    pure_lex_errors: {0}".format(self.pure_lex_errors))

                lex_errors, lex_errors_history = self.__locate_lex_error(
                    lex_error_list)

                self.lex_error_list = lex_errors
                self.lex_errors_history = lex_errors_history
                helpers.debug("[check_grammar_lex.py] check_correct_lex() | \
                    lex_error_list: {0}".format(self.lex_error_list))
                helpers.debug("[check_grammar_lex.py] check_correct_lex() | \
                    lex_errors_history: {0}".format(self.lex_errors_history))

                self.__first_error = False

                self.__attempt_to_fix_error(lex_errors)

        elif self.lex_error_list and \
        not self.__first_error and \
        self.attempts < 3 and \
        self.latex_string:

            helpers.debug("\n[check_grammar_lex.py] check_correct_lex() | \
                There's previous error. Searching for new errors.")

            second_lex_error_list = lex.LatexLexer(self.latex_string)

            if second_lex_error_list:
                helpers.debug("[check_grammar_lex.py] check_correct_lex() | \
                    New errors found.")

                self.pure_lex_errors.extend(second_lex_error_list)
                helpers.debug("[check_grammar_lex.py] check_correct_lex() | \
                    pure_lex_errors: {0}".format(self.pure_lex_errors))
                ''' HUM... CONFERIR.
                    If new error is EOF error
                    Remove the error from the list. Takes the next one.
                '''
                if second_lex_error_list[0][1] == -1:
                    second_lex_error_list.reverse()
                    second_lex_error_list.pop()
                    second_lex_error_list.reverse()

                lex_errors, lex_errors_history = self.__locate_lex_error(
                    second_lex_error_list)

                self.lex_error_list = lex_errors
                self.lex_errors_history = lex_errors_history
                helpers.debug("[check_grammar_lex.py] check_correct_lex() | \
                    lex_error_list: {0}".format(self.lex_error_list))
                helpers.debug("[check_grammar_lex.py] check_correct_lex() | \
                    lex_errors_history: {0}".format(self.lex_errors_history))

                self.__attempt_to_fix_error(lex_errors)

        elif (self.lex_error_list and self.attempts >= 3) or \
        not self.latex_string:
            raise LexicalError({
                'latex': self.latex,
                'latex_list': self.latex_list,
                'latex_string': self.latex_string,
                'error': self.lex_error_list,  # Current error
                'errors_history': self.lex_errors_history,
                'pure_errors': self.pure_lex_errors
            })

        return {
            'latex': self.latex,
            'latex_list': self.latex_list,
            'latex_string': self.latex_string,
            'errors_history': self.lex_errors_history,
            'pure_errors': self.pure_lex_errors
        }