예제 #1
0
def check_baseline(a):
    # 检查每个单词的baseline,如果baseline不同,则分离出去
    # 字符的高度有两种,一等高度和二等高度
    log.info("enter")
    ans = []
    for i in a:
        baseline = []
        for ch in i:
            h = ch['max_x'] - ch['min_x']
            if abs(h - 15) < 3:  # 只统计占一格的字符
                baseline.append(ch['max_x'])
        if len(baseline) < 3:
            ans.append(i)
            continue
        baseline_value = find_frequent(baseline)
        new_word = []
        for ch in i:
            h = ch['max_x'] - ch['min_x']
            if abs(h - 15) < 3:  # 如果占一格
                if abs(ch['max_x'] - baseline_value) > 4:
                    ans.append([ch])  # 占一格且远离基线,必然不属于当前word
                else:
                    new_word.append(ch)
            else:
                new_word.append(ch)
        ans.append(new_word)
    log.info("end")
    return ans
예제 #2
0
def go(img):
    log.info("enter")
    img, recs = img2recs(img=img)
    if len(recs) == 0: return
    recs = eye.predict(recs)
    if len(recs) == 0: return
    recs = sort_recs(recs)
    ans = "".join(map(lambda i: i['ans'], recs))
    print(imgId, ans)
    log.info("end")
    return ans
예제 #3
0
def grab_img():
    # 获取屏幕截图
    global imgId
    log.info('enter')
    # 图片越大截图花费时间越多,所以截图应该尽量小
    # img = ImageGrab.grab((250, 161, 1141, 610))
    filename = "grab/%d.jpg" % imgId
    grab(filename, 62, 160, 857, 575)
    imgId += 1
    img = io.imread(filename)
    log.info("end")
    return img
예제 #4
0
def sort_words(words):
    # 对单词进行排序,防止单词覆盖
    # 优先拼写较长的单词,因为较长的单词表明该单词没有被覆盖
    log.info("enter")
    v = [0] * len(words)
    for ind, i in enumerate(words):
        w = ''.join(map(lambda x: x['ans'], i))
        print(w)
        if w in english_dic:
            v[ind] = len(w)
    v = np.argsort(v)
    a = []
    for i in range(len(v) - 1, -1, -1):
        a.append(words[v[i]])
    log.info("end")
    return a
예제 #5
0
def build_words(recs):
    # 根据小矩形构建单词
    log.info("enter")
    char_gap_width = 9

    recs = sorted(recs, key=lambda x: x['min_y'])
    a = []
    for i in recs:
        had = False
        for ind, word in enumerate(a):
            if word[-1]['max_y'] + char_gap_width > i['min_y'] and intersect(
                    word[-1], i):
                word.append(i)
                had = True
        if not had:
            a.append([i])
    log.info("end")
    return a
예제 #6
0
def find_frequent(a):
    """
    给定一个数组,求数组中的众数
    方法是对元素进行KMEANS聚类,聚成3类,取元素最多的那一类
    这种方法可以寻找近似众数
    :param a:
    :return:
    """
    log.info("enter")
    a = np.array(a)
    km = KMeans(n_clusters=min(len(a), 3))
    label = km.fit_predict(np.reshape(a, (-1, 1)))
    cnt = collections.Counter(label)
    ma = None
    for i in cnt:
        if ma is None or cnt[ma] < cnt[i]:
            ma = i
    baseline = np.mean(a[label == ma])
    log.info("end")
    return baseline