Beispiel #1
0
    def viterbi_post_process(self, img_arr, results):
        '''Go through all results and attempts to correct invalid syllables'''
        final = [[] for i in range(len(results))]
        for i, line in enumerate(results):
            syllable = []
            for j, char in enumerate(line):
                if char[-1] in u'་། ' or not word_parts_set.intersection(char[-1]) or j == len(line)-1:
                    if syllable:
                        syl_str = ''.join(s[-1] for s in syllable)
                        
                        if is_non_std(syl_str) and syl_str not in syllables:
                            print syl_str, 'HAS PROBLEMS. TRYING TO FIX'
                            bx = combine_many_boxes([ch[0:4] for ch in syllable])
                            bx = list(bx)

                            arr = img_arr[bx[1]:bx[1]+bx[3], bx[0]:bx[0]+bx[2]]
                            arr = fadd_padding(arr, 3)

                            try:
                                temp_dir = tempfile.mkdtemp()
                                tmpimg = os.path.join(temp_dir, 'tmp.tif')
                                Image.fromarray(arr*255).convert('L').save(tmpimg)
                                pgrec = PageRecognizer(tmpimg, Config(line_break_method='line_cut', page_type='book', postprocess=False, viterbi_postprocessing=True, clear_hr=False, detect_o=False))
                                prob, hmm_res = pgrec.recognize_page()
                                os.remove(tmpimg)
                                os.removedirs(temp_dir)
                            except TypeError:
                                print 'HMM run exited with an error.'
                                prob = 0
                                hmm_res = ''
                            
                            logging.info(u'VPP Correction: %s\t%s' % (syl_str, hmm_res))
                            if prob == 0 and hmm_res == '':
                                print 'hit problem. using unmodified output'
                                for s in syllable:
                                    final[i].append(s)
                            else:
                                bx.append(prob)
                                bx.append(hmm_res)
                                final[i].append(bx)
                        else:
                            for s in syllable:
                                final[i].append(s)
                    final[i].append(char)
                    syllable = []
                else:
                    syllable.append(char)
            if syllable:
                for s in syllable:
                    final[i].append(s)
    
        return final
Beispiel #2
0
def viterbi_post_process(img_arr, results):
    '''Go through all results and attempts to correct invalid syllables'''
    final = [[] for i in range(len(results))]
    for i, line in enumerate(results):
        syllable = []
        for j, char in enumerate(line):
            if char[-1] in u'་། ' or not word_parts.intersection(char[-1]) or j == len(line)-1:
                if syllable:
                    syl_str = ''.join(s[-1] for s in syllable)
                    
                    if is_non_std(syl_str) and syl_str not in syllables:
                        print syl_str, 'HAS PROBLEMS. TRYING TO FIX'
                        bx = combine_many_boxes([ch[0:4] for ch in syllable])
                        bx = list(bx)
                        arr = img_arr[bx[1]:bx[1]+bx[3], bx[0]:bx[0]+bx[2]]
                        arr = fadd_padding(arr, 3)
                        try:
                            
                            prob, hmm_res = main(arr, Config(line_break_method='line_cut', page_type='book', postprocess=False, viterbi_postprocess=True, clear_hr=False), page_info={'flname':''})
                        except TypeError:
                            print 'HMM run exited with an error.'
                            prob = 0
                            hmm_res = ''
                        
#                         corrections[syl_str].append(hmm_res) 
                        logging.info(u'VPP Correction: %s\t%s' % (syl_str, hmm_res))
                        if prob == 0 and hmm_res == '':
                            print 'hit problem. using unmodified output'
                            for s in syllable:
                                final[i].append(s)
                        else:
                            bx.append(prob)
                            bx.append(hmm_res)
                            final[i].append(bx)
                    else:
                        for s in syllable:
                            final[i].append(s)
                final[i].append(char)
                syllable = []
            else:
                syllable.append(char)
        if syllable:
            for s in syllable:
                final[i].append(s)

    return final
Beispiel #3
0
    def __init__(self, shapes, k):
        from sklearn.cluster import KMeans
        self.shapes = shapes
        self.k = k
        self.page_array = shapes.img_arr

        if shapes.conf['line_cluster_pos'] == 'top':
            tops = array(shapes.get_tops(), dtype=float64)
        elif shapes.conf['line_cluster_pos'] == 'center':
            tops = array(
                 [t[1] + .5*shapes.char_mean for t in shapes.get_boxes() if t[3] > 2* shapes.tsek_mean],
                    dtype=float64
                         )
        else:
            raise ValueError, "The line_cluster_pos argument must be either 'top' or 'center'"

        tops.shape = (len(tops), 1)
        
        kmeans = KMeans(n_clusters=k)
#         print tops
        kmeans.fit(tops)
        
        
        ################## 
        ######## mark cluster centroids on original image and show them
#        img_arr = shapes.img_arr.copy()
#        for centroid in kmeans.cluster_centers_:
##            print centroid[0]
#            img_arr[centroid[0],:] = 0
#            
#        import Image
#        Image.fromarray(img_arr*255).show()
        #######################3
        
        lines = [[] for i in range(k)]
        
        ind = shapes.get_indices()
        
        ### Assign char pointers (ind) to the appropriate line ###
#        [lines[kmeans.labels_[i]].append(ind[i]) for i in range(len(ind))]
        [lines[kmeans.predict(shapes.get_boxes()[ind[i]][1])[0]].append(ind[i]) for i in range(len(ind))]
        lines = [l for l in lines if l]
        self.k = len(lines)
        boxes = shapes.get_boxes()
        
        
        ### Sort indices so they are in order from top to bottom using y from the first box in each line
        
        sort_inx = list(argsort([boxes[line[0]][1] for line in lines]))
        lines.sort(key=lambda line: boxes[line[0]][1])
        
        ### Get breaklines for splitting up lines
        ### Uses the topmost box in each line cluster to determine breakline
        
        try:
            topmosts = [min([boxes[i][1] for i in line]) for line in lines]
        except ValueError:
            print 'failed to get topmosts...'
            raise
        
        vsums = self.page_array.sum(axis=1)
        breaklines = []
        delta = 25
        for c in topmosts:
            if c - delta < 0:
                lower = 0
            else:
                lower = c-delta
            e = argmax(vsums[lower:c+delta])
            c = c - delta + e
            if c < 0:
                c = 0
            breaklines.append(c)
    
        breaklines.append(self.page_array.shape[0])
        self.baselines = []
        
        for i, br in enumerate(breaklines[:-1]):
            
            try:
                baseline_area = vsums[br:breaklines[i+1]]
                if baseline_area.any():
                    self.baselines.append(br + argmin(baseline_area))
                else:
                    print i
                    print 'No baseline info'
            except ValueError:
                print 'ValueError. exiting...HERE'
                import traceback;traceback.print_exc()
                
                raise

        final_ind = dict((i, []) for i in range(len(lines)))
        self.new_contours = {}
        for j, br in enumerate(breaklines[1:-1]):
            topcount = 0
            bottomcount = 0
            for i in lines[j]:
                # if char extends into next line, break it
                # 253 is roughly global line height avg + 1 std
                # The following lines says that a box/char must be extending over 
                # breakline by a non trivial amount eg. 30 px and must itself
                # be a tall-ish box (roughly the height of average line) in order
                # for it to be broken. 
    #            if (bounding[i][1] + bounding[i][3]) - br >= 30 and bounding[i][3] > 205:
                if (boxes[i][1] + boxes[i][3]) - br >= 30 and \
                    (boxes[i][1] + boxes[i][3]) - topmosts[j] > self.shapes.char_mean*2.85:
                    chars = ones((boxes[i][3]+2, boxes[i][2]+2), dtype=uint8)
                    contours = shapes.contours
                    cv.drawContours(chars, [contours[i]], -1,0, \
                        thickness = -1, offset=(-boxes[i][0]+1,-boxes[i][1]+1))
                    cv.dilate(chars, None, chars)
                    y_offset = boxes[i][1]
                    new_br = br - y_offset
                    prd_cut = []

                    ### Iterate through potential cut-points and 
                    ### and cut where top half has the highest probability
                    ### that is not a tsek
#                     print 'bottom bound cut point', int(.75*shapes.tsek_mean)
                    for delta in range(-3, int(.75*shapes.tsek_mean), 1):
#                     for delta in range(-3, 100, 1):
                        cut_point = new_br + delta
#                        chars[cut_point, :] = 0
#                        import Image
#                        Image.fromarray(chars*255).show()
                        tchr = chars[:cut_point,:]
                        tchr = ftrim(tchr)
                        if not tchr.any():
                            continue
                        tchr = normalize_and_extract_features(tchr)
                        probs = cls.predict_proba(tchr)
                        max_prob_ind = argmax(probs)
                        chr = label_chars[max_prob_ind]
                        prd_cut.append((probs[0,max_prob_ind], chr, cut_point))
                    
                    prd_cut = [q for q in prd_cut if q[1] != u'་']
                    try:
                        cut_point = max(prd_cut)[-1]
                    except:
                        print 'No max prob for vertical char break, using default breakline. Usually this means the top half of the attempted segmentation looks like a tsek blob'
                        cut_point = br-boxes[i][1]

                    #######FOLLWNG NOT WORKING ATTEMPTS TO GET A BETTER BREAK LINE
    #                br2 = br-bounding[i][1]
    #                
    #                csum = chars.sum(axis=1)
    #                bzone = csum[br2-25:br2+40]
    #                if bzone.any():
    #                    br2 = np.argmax(bzone) + (br-25)
    ##                    print br, 'br'
    #                chars = chars*255
    #                nbr = br
    #                cv.line(chars, (0, br2), (chars.shape[1], br2), 0)
    #                Image.fromarray(chars).save('/tmp/outt.tiff')
    #                sys.exit()
                    #############
                    
                    tarr = chars[:cut_point,:]
                    tarr, top_offset = ftrim(tarr, new_offset=True)
                    tarr = fadd_padding(tarr, 3)
                    barr = chars[cut_point:,:]
                    barr = ftrim(barr, sides='brt') 
                    barr = fadd_padding(barr, 3)
                    
                    c1, h = cv.findContours(image=tarr, mode=cv.RETR_LIST, method=cv.CHAIN_APPROX_SIMPLE, offset=(boxes[i][0]+top_offset['left'],boxes[i][1]))

                    c1 = c1[argmax([len(t) for t in c1])] # use the most complex contour

                    bnc1 = cv.boundingRect(c1)

                    c2, h = cv.findContours(barr, mode=cv.RETR_LIST, 
                                            method=cv.CHAIN_APPROX_SIMPLE,
                                            offset=(boxes[i][0]-3,boxes[i][1]+cut_point-3))

                    c2 = c2[argmax([len(t) for t in c2])]
                    bnc2 = cv.boundingRect(c2)

                    topbox_name = 't%d_%d' % (j, topcount)
                    final_ind[j].append(topbox_name)
                    self.new_contours[topbox_name] = (bnc1, c1)
                    topcount += 1
                    
                    if bnc2[-1] > 8: #only add bottom contour if not trivially small
                        bottombox_name = 'b%d_%d' % (j, bottomcount)
                        final_ind[j+1].append(bottombox_name)
                        self.new_contours[bottombox_name] = (bnc2, c2)
                        bottomcount += 1
                    
                else:
                    final_ind[j].append(i)
            # Don't forget to include the last line
        map(final_ind[len(lines)-1].append, lines[len(lines)-1])
        
        self.lines_chars = final_ind
        
        cctops = [self.shapes.get_boxes()[i][1] for i in self.shapes.small_contour_indices]
        char_tops = zip(cctops, self.shapes.small_contour_indices)
        char_tops.sort(key=lambda x: x[0])
        sorted_indices = [i[1] for i in char_tops]
        _line_insert_indxs = []
        _line_insert_indxs.extend([bisect_right(char_tops, (i - 1,))
                                   for i in breaklines])
        self.small_cc_lines_chars = []
        if not _line_insert_indxs: sys.exit()

        for i, l in enumerate(_line_insert_indxs[:-1]):
            self.small_cc_lines_chars.append(sorted_indices[l:_line_insert_indxs[i+1]])
        
        self.small_cc_lines_chars.append(sorted_indices[_line_insert_indxs[-1]:])
        
        self.small_cc_lines_chars = [self.small_cc_lines_chars[i] for i in range(len(self.lines_chars)) if self.lines_chars[i]]
       
        cctops = [self.shapes.get_boxes()[i][1] for i in self.shapes.emph_symbols]
        char_tops = zip(cctops, self.shapes.emph_symbols)
        char_tops.sort(key=lambda x: x[0])

        empred = [kmeans.predict(shapes.get_boxes()[i][1])[0] for i in self.shapes.emph_symbols]
        
        self.emph_lines = [[] for i in range(k)]
        for nn, e in enumerate(empred):
            self.emph_lines[sort_inx.index(e)].append(self.shapes.emph_symbols[nn])
        
    
        if self.shapes.detect_o:
            cctops = [self.shapes.get_boxes()[i][1] for i in self.shapes.naros]
            char_tops = zip(cctops, self.shapes.naros)
            char_tops.sort(key=lambda x: x[0])
            sorted_indices = [i[1] for i in char_tops]
            _line_insert_indxs = []
            _line_insert_indxs.extend([bisect_right(char_tops, (i - 1,))
                                       for i in breaklines])
            
            if not _line_insert_indxs: sys.exit()
            
            self.line_naros = []
            for i, l in enumerate(_line_insert_indxs[:-1]):
    #   
                self.line_naros.append(sorted_indices[l:_line_insert_indxs[i+1]])
            
            self.line_naros.append(sorted_indices[_line_insert_indxs[-1]:])
            
            self.line_naros  = [self.line_naros[i] for i in range(len(self.lines_chars)) if self.lines_chars[i]]
            self.line_naro_spans = []
            for ll, mm in enumerate(self.line_naros):
                thisline = []
                for nn, naro in enumerate(mm):
                    box = self.get_box(naro)
                    thisline.append(box)
                thisline.sort(key=lambda x: x[0])
                self.line_naros[ll].sort(key=lambda x: self.get_box(x)[0])
                self.line_naro_spans.append(thisline)
    
        if self.shapes.low_ink:
            
            cctops = [lib[1] for lib in self.shapes.low_ink_boxes]
            char_tops = zip(cctops, self.shapes.low_ink_boxes)
            char_tops.sort(key=lambda x: x[0])
            sorted_indices = [i[1] for i in char_tops]
            _line_insert_indxs = []
            _line_insert_indxs.extend([bisect_right(char_tops, (i - 1,))
                                       for i in breaklines])
            
            self.low_ink_boxes = []
            if not _line_insert_indxs: sys.exit()
    
            for i, l in enumerate(_line_insert_indxs[:-1]):
                self.low_ink_boxes.append(sorted_indices[l:_line_insert_indxs[i+1]])
            
            self.low_ink_boxes.append(sorted_indices[_line_insert_indxs[-1]:])
            
            self.low_ink_boxes = [self.low_ink_boxes[i] for i in range(len(self.lines_chars)) if self.lines_chars[i]]
Beispiel #4
0
    def construct_vector_set_experimental(self):

        NINF = -np.inf

        final_box_info = CombineBoxesForPage(self.line_info)

        self.final_box_info = final_box_info
        final_boxes = final_box_info.final_boxes

        final_indices = final_box_info.final_indices
        scales = final_box_info.transitions

        self.vectors = [[] for i in range(self.line_info.k)]
        self.new_boxes = [[] for i in range(self.line_info.k)]  #
        cur_mean = self.final_box_info.char_mean
        cur_std = self.final_box_info.char_std
        BREAKWIDTH = self.breakwidth
        rbfcls = self.line_info.rbfcls
        for l in range(len(final_indices)):  # for each line
            try:
                scale_l = scales[l]
            except:
                print 'ERROR AT ', l, len(scales)
                raise
            char_mean_int = floor(final_box_info.char_mean)
            char_std_int = ceil(final_box_info.char_std)

            try:
                lb = range(len(final_indices[l]))
            except IndexError:
                print 'index error'
                continue

            segmented = 0
            for i in lb:  # for each line box

                ## New draw, takes into account tree hierarchy of contours
                x, y, w, h = final_boxes[l][i]
                letter = ones((h, w), dtype=uint8)
                for k in final_indices[l][i]:
                    if not isinstance(k, str):
                        letter = self.line_info.shapes.draw_contour_and_children(
                            k, char_arr=letter, offset=(-x, -y))
                    else:
                        cv.drawContours(letter,
                                        [self.line_info.get_contour(k)],
                                        -1,
                                        0,
                                        thickness=-1,
                                        offset=(-x, -y))

                letter = cv.resize(letter,
                                   dsize=(0, 0),
                                   fx=scale_l,
                                   fy=scale_l)
                if letter.shape[1] >= (final_box_info.char_mean +
                                       BREAKWIDTH * final_box_info.char_std
                                       ):  # if a box is too large, break it
                    #
                    segmented += 1
                    sw = w * scale_l
                    sh = h * scale_l
                    vsum = letter.sum(axis=0)
                    chars = sw // (final_box_info.char_mean -
                                   1.5 * final_box_info.char_std
                                   )  # important, floor division

                    if 10.0 > chars > 1.0:  # Assume chars-to-be-broken don't span > 10
                        #                     if chars:
                        w = sw
                        h = sh

                        best_box_dim = []
                        best_prob = 0.0
                        best_seq = None
                        ## Iterate through a range of variable chars if
                        ## chars is greater than 2. This allows potential
                        ## breaks for chars-1, chars-2
                        #                         all_choices = []

                        for chars in range(int(chars), 1, -1):

                            for z in range(0, 21, 2):
                                segs = []
                                prev_breakline = 0
                                for pos in range(int(chars - 1)):
                                    if char_mean_int - z >= 0:

                                        upper_range = [
                                            int(
                                                np.round((pos + 1) *
                                                         (char_mean_int - z))),
                                            int(
                                                np.round((pos + 1) *
                                                         (char_mean_int + z)))
                                        ]
                                        vsum_range = vsum[
                                            upper_range[0]:upper_range[1]]

                                        if vsum_range.any():
                                            breakline = int(
                                                np.round((pos + 1) *
                                                         (char_mean_int - z) +
                                                         argmax(vsum_range)))
                                        else:
                                            breakline = None

                                        if breakline:
                                            sg = letter[:, prev_breakline:
                                                        breakline]

                                            prev_breakline = breakline
                                        else:
                                            sg = letter[:,
                                                        int(
                                                            np.round(pos * (
                                                                char_mean_int -
                                                                z))
                                                        ):int(
                                                            np.
                                                            round((pos + 1) * (
                                                                char_mean_int -
                                                                z)))]
                                            prev_breakline = int(
                                                np.round((pos + 1) *
                                                         (char_mean_int - z)))

                                        segs.append(sg)

                                segs.append(
                                    letter[:,
                                           int(
                                               np.round((chars - 1) *
                                                        (char_mean_int -
                                                         z))):])

                                segs = [fadd_padding(sg, 2) for sg in segs]
                                seg_ctrs = [
                                    cv.findContours(
                                        sg.copy(),
                                        mode=cv.RETR_CCOMP,
                                        method=cv.CHAIN_APPROX_SIMPLE)
                                    for sg in segs
                                ]
                                try:
                                    seg_bxs = [[
                                        cv.boundingRect(k) for k in sgc[0]
                                    ] for sgc in seg_ctrs]
                                except:
                                    print sgc
                                    raise

                                bxs = []
                                nsegs = []

                                prev_w = 0
                                for zi, ltb in enumerate(seg_bxs):
                                    seg = segs[zi]
                                    for b in ltb:
                                        if b[2] < (
                                                final_box_info.tsek_mean +
                                                4 * final_box_info.tsek_std
                                        ) or b[3] < final_box_info.tsek_mean + 4 * final_box_info.tsek_std:
                                            seg[b[1] - 1:b[1] + b[3] + 1,
                                                b[0] - 1:b[0] + b[2] +
                                                1] = True
                                    seg, ofst = ftrim(seg, new_offset=True)
                                    bx = [
                                        x + prev_w + (ofst['left'] / scale_l),
                                        y + (ofst['top'] / scale_l),
                                        seg.shape[1] / scale_l,
                                        seg.shape[0] / scale_l
                                    ]
                                    prev_w += seg.shape[1] / scale_l
                                    bxs.append(bx)
                                    nsegs.append(seg)

                                xt = [
                                    normalize_and_extract_features(sg)
                                    for sg in nsegs if 0 not in sg.shape
                                ]
                                prd_probs = cls.predict_log_proba(xt)
                                prd_probs = prd_probs.astype(np.float32)

                                prob, prds = viterbi_cython(
                                    prd_probs.shape[0], n_states, start_p,
                                    trans_p, prd_probs)
                                prob = np.exp(prob)

                                if prob > best_prob:
                                    best_prob = prob
                                    best_seq = prds
                                    best_box_dim = bxs
                                    best_xt = xt

                        if not best_box_dim:
                            best_prob = prob
                            best_seq = prds
                            best_box_dim = bxs
                            best_xt = xt

                        for u in range(len(best_seq)):
                            self.vectors[l].append(label_chars[best_seq[u]])
                            best_box = best_box_dim[u]
                            best_box = [int(np.round(ii)) for ii in best_box]
                            best_box.append(best_prob)
                            best_box.append(label_chars[best_seq[u]])
                            self.new_boxes[l].append(best_box)

                            try:
                                self.line_info.shapes.img_arr[
                                    best_box[1]:best_box[1] + best_box[3],
                                    best_box[0] + best_box[2]] = 1
                            except:

                                pass

                    else:
                        self.new_boxes[l].append([x, y, w, h])
                        vect = normalize_and_extract_features(letter)
                        self.vectors[l].append(vect)

                else:
                    self.new_boxes[l].append([x, y, w, h])
                    vect = normalize_and_extract_features(letter)
                    self.vectors[l].append(vect)

        if not any(self.vectors):
            print 'no vectors'
            return
        else:
            if self.line_info.shapes.detect_o:

                for i, l in enumerate(self.new_boxes):
                    for n in self.line_info.line_naros[i]:
                        box = self.line_info.get_box(n)
                        x, y, w, h = box
                        r0 = x + w
                        for k, b in enumerate(l):
                            if ((b[2] + w) - abs(b[0] - x) - abs(
                                (b[0] + b[2]) - r0)) / (
                                    2 * float(min(w, b[2]))) > .8:
                                try:
                                    nbox = list(combine_many_boxes([box, b]))
                                except:
                                    print nbox[3]
                                    raise
                                if isinstance(self.vectors[i][k], unicode):
                                    self.vectors[i][k] += u'ོ'
                                    nbox = b
                                    nbox[-1] = self.vectors[i][k]
                                else:
                                    probs = cls.predict_log_proba(
                                        self.vectors[i][k])
                                    mx = np.argmax(probs)
                                    prob = probs[0][mx]
                                    mx = rbfcls.predict(self.vectors[i][k])[0]
                                    ch = label_chars[mx] + u'ོ'
                                    self.vectors[i][k] = ch
                                    nbox.append(prob)
                                    nbox.append(ch)
                                self.new_boxes[i][k] = nbox
Beispiel #5
0
    def _sample_widths_method(self,
                              chars,
                              letter,
                              letter_box,
                              oo_scale_l,
                              line_num=None):
        x, y, w, h = letter_box

        ################default
        cur_mean = self.final_box_info.char_mean * .97
        cur_std = .295 * self.final_box_info.char_std
        #################
        best_prob = -np.inf

        if chars > 1:
            letter = cv.dilate(letter.copy(), None, iterations=1)

            padding_amount = 3

            for n in range(15):

                widths = [gauss(cur_mean, cur_std) for i in range(chars)]
                prev = 0
                vecs = []
                wdthprobs = 0
                boxes = []
                for i, val in enumerate(widths):
                    if i == chars - 1:
                        end = letter.shape[1]
                    else:
                        end = prev + val
                    wdthprobs += gausslogprob(cur_mean, cur_std, end - prev)

                    s = fadd_padding(letter[:, int(prev):int(end)],
                                     padding_amount)
                    _, ctrs, hier = cv.findContours(
                        s.copy(),
                        mode=cv.RETR_TREE,
                        method=cv.CHAIN_APPROX_NONE)
                    bounding = map(boundingRect, ctrs)
                    for k, b in enumerate(bounding):
                        if (b[2] < 23 or b[3] < 23) and hier[0][k][3] == 0:
                            s[b[1] - 1:b[1] + b[3] + 1,
                              b[0] - 1:b[0] + b[2] + 1] = 1
                    s = s[padding_amount:-padding_amount,
                          padding_amount:-padding_amount]
                    s, ofst = ftrim(s, new_offset=True)

                    if 0 not in s.shape:
                        nnbox = [
                            x + (prev + ofst['left']) * oo_scale_l,
                            y + (ofst['top'] * oo_scale_l),
                            s.shape[1] * oo_scale_l, s.shape[0] * oo_scale_l
                        ]
                        if line_num is not None:
                            naro = self.line_info.check_naro_overlap(
                                line_num, nnbox)
                            if naro != False:

                                naro_box = self.line_info.get_box(naro)
                                nnbox = combine_many_boxes([nnbox, naro_box])
                                ss = cv.resize(s,
                                               dsize=(0, 0),
                                               fx=oo_scale_l,
                                               fy=oo_scale_l)
                                ss = np.vstack((ones(
                                    (nnbox[3] - ss.shape[0], ss.shape[1]),
                                    dtype=ss.dtype), ss))
                                ss = hstack(
                                    (ss,
                                     ones(
                                         (ss.shape[0], nnbox[2] - ss.shape[1]),
                                         dtype=ss.dtype)))

                                cv.drawContours(
                                    ss, [self.line_info.get_contour(naro)],
                                    -1,
                                    0,
                                    thickness=-1,
                                    offset=(-naro_box[0], -naro_box[1]))
                                s = ss
                        vecs.append(normalize_and_extract_features(s))
                        boxes.append(nnbox)
                    else:
                        break
                    prev += val
                if not vecs: continue
                xn = len(vecs)

                vecs = np.array(vecs).reshape(xn, 346)  # 346 is len(vecs[0])

                probs = predict_log_proba(vecs)
                probs = probs.astype(np.float32)

                if n % 10 == 0 and n != 0:

                    cur_mean = self.final_box_info.char_mean * (
                        .97 - (3 * n / 1000.0))

                prob, prds = viterbi_cython(xn, n_states, start_p, trans_p,
                                            probs)
                prob = prob + wdthprobs
                if prob > best_prob:
                    best_prob = prob
                    best_prd = prds
                    best_boxes = boxes
        else:
            best_boxes = [letter_box]
            probs = predict_log_proba(normalize_and_extract_features(letter))
            amx = probs[0].argmax()
            try:
                startprob = start_p[amx]
            except IndexError:
                startprob = 1e-10
            best_prob = probs[0][amx] + gausslogprob(
                cur_mean, cur_std, letter_box[2] / oo_scale_l) + startprob
            best_prd = [amx]

        final_prob = best_prob
        res = []
        for i, val in enumerate(best_prd):
            best_boxes[i] = [int(np.round(k)) for k in best_boxes[i]]
            best_boxes[i].extend([float(np.exp(final_prob)), label_chars[val]])
            res.append(best_boxes[i])

        return (final_prob, res)