コード例 #1
0
ファイル: extractor.py プロジェクト: after12am/summary
 def content(self):
     sects = parser.decompose(extract_normed_body(self.html))
     clusts = cluster.lbcluster(sects)
     # sorting cluster by their score
     clusts.sort(cmp=lambda a,b: cmp(b.points, a.points))
     best = clusts[0]
     if len(best.body) > 0:
         return decode_entities(best.body)
     return False
コード例 #2
0
ファイル: extractor.py プロジェクト: after12am/summary
 def title(self):
     sects = parser.decompose(extract_normed_body(self.html))
     clusts = cluster.lbcluster(sects)
     # sorting cluster by their score
     clusts.sort(cmp=lambda a,b: cmp(b.points, a.points))
     # calcurate high score cluster
     best = clusts[0]
     if len(best.blocks) == 0:
         return False
     factor = 1.0
     continuous = 1.0
     bestmatch = [u'', 0]
     items = sects[:sects.index(best.blocks[0])]
     items.reverse()
     for b in items:
         if len(bestmatch[0]) > 0:
             continuous /= self.continuous_factor
         if len(b.text) == 0:
             continue
         factor *= self.decay_factor
         if lbttlscore(b, factor) * continuous > bestmatch[1]:
             bestmatch[0]  = b.text
             bestmatch[1] = lbttlscore(b, factor) * continuous
     return bestmatch[0]
コード例 #3
0
ファイル: main.py プロジェクト: dxawgzy/mathAI
        'candidates': candidates
    })
#print([x['location'] for x in characters])

sorted_characters = sort_characters(characters)
# print('排序前的字符列表')
# print(characters)
# print('排序后的字符序列')
# print([[x['location'],x['candidates']] for x in sorted_characters])
tokens = process.group_into_tokens(sorted_characters)
print('识别出的token')
print(tokens)
# 先将每一个token初始化成一个树节点,得到一个节点列表node_list
node_list = parser.characters_to_nodes(sorted_characters)

parser_tree = parser.decompose(node_list)
print(parser_tree)
latex_str = post_order(parser_tree)
print(latex_str)
print(parser_tree['value'])
# parser_tree = parser.parser(sorted_characters)
# for i in range(10):
#     print()
# print('识别的表达式:')
# latex_str = tools.print_parser_tree(parser_tree,"")
# print()
# value = calculate(parser_tree)
# print('计算结果:',value)
#
# print('转化成的latex语句:')
#
コード例 #4
0
def solve(filename, mode='product'):
    original_img, binary_img = read_img_and_convert_to_binary(filename)
    symbols = binary_img_segment(binary_img, original_img)
    sort_symbols = sort_characters(symbols)
    process.detect_uncontinous_symbols(sort_symbols, binary_img)
    length = len(symbols)
    column = length / 3 + 1
    index = 1
    # for symbol in symbols:
    #     # print(symbol)
    #     plt.subplot(column,3,index)
    #     plt.imshow(symbol['src_img'], cmap='gray')
    #     plt.title(index), plt.xticks([]), plt.yticks([])
    #     index += 1
    # temp_img = original_img[:, :, ::-1]
    # # cv2.imshow('img',temp_img)
    # # cv2.waitKey(0)
    # # cv2.destroyAllWindows()
    # plt.subplot(column,3,index)
    # plt.imshow(temp_img, cmap = 'gray', interpolation = 'bicubic')
    # plt.title(index),plt.xticks([]), plt.yticks([])
    # plt.show()

    symbols_to_be_predicted = normalize_matrix_value(
        [x['src_img'] for x in symbols])

    predict_input_fn = tf.estimator.inputs.numpy_input_fn(
        x={"x": np.array(symbols_to_be_predicted)}, shuffle=False)

    predictions = cnn_symbol_classifier.predict(input_fn=predict_input_fn)

    characters = []
    for i, p in enumerate(predictions):
        # print(p['classes'],FILELIST[p['classes']])
        candidates = get_candidates(p['probabilities'])
        characters.append({
            'location': symbols[i]['location'],
            'candidates': candidates
        })
    #print([x['location'] for x in characters])

    modify_characters(characters)

    # print('排序后的字符序列')
    # print([[x['location'], x['candidates']] for x in characters])
    tokens = process.group_into_tokens(characters)
    # print('识别出的token')
    # print(tokens)
    # 先将每一个token初始化成一个树节点,得到一个节点列表node_list
    node_list = parser.characters_to_nodes(characters)

    parser_tree = parser.decompose(node_list)
    # print(parser_tree)
    set_forward_step(0)
    post_order(parser_tree)
    y_start = 0.9
    y_stride = 0.2
    if parser_tree['status'] == STATUS['solved']:
        latex_strs = []
        i = 5
        j = 0

        while j < i and isinstance(parser_tree['structure'], list):
            set_forward_step(1)
            latex_str = post_order(parser_tree)
            latex_strs.append(latex_str)
            j = j + 1
        # for latex_str in latex_strs:
        #     print(latex_str)
        # print(parser_tree)

        for i, latex_str in enumerate(latex_strs):
            if i == 0:
                expression_str = r'$expression:' + latex_str + '$'
            else:
                expression_str = r'$step' + str(i) + ':' + latex_str + '$'
            # print(expression_str)
            font_size = 18
            if len(latex_str) > 12:
                font_size = 15
            plt.text(0.1, y_start, expression_str, fontsize=font_size)
            y_start = y_start - y_stride
        latex_str = latex_strs[0]
    else:
        set_forward_step(0)
        latex_str = post_order(parser_tree)
        expression_str = r'$expression:' + latex_str + '$'
        font_size = 18
        if len(latex_str) > 12:
            font_size = 15
        plt.text(0.1, y_start, expression_str, fontsize=font_size)
        y_start = y_start - y_stride

    # print(solve_expression(parser_tree))
    solution = ''
    answer = ''
    if parser_tree['status'] == STATUS['solved']:
        # print(latex(parser_tree['value']))
        if isinstance(parser_tree['value'], int) or isinstance(
                parser_tree['value'], float):
            solution = r'$result:' + str(parser_tree['value']) + '$'
            answer = str(parser_tree['value'])
        else:
            solution = r'$result:' + str(latex(parser_tree['value'])) + '$'
            answer = str(latex(parser_tree['value']))
    elif parser_tree['type'] == NODE_TYPE['derivation'] or parser_tree[
            'type'] == NODE_TYPE['limitation']:
        solution = r'$result:' + str(latex(parser_tree['value'])) + '$'
        answer = str(latex(parser_tree['value']))
    elif parser_tree['status'] == STATUS['eq1'] or parser_tree[
            'status'] == STATUS['eq2']:

        result = solve_expression(parser_tree)
        # print(result)
        solution = r'$result:' + result_to_str(result) + '$'
        answer = result
    elif parser_tree['status'] == STATUS['other']:
        answer = latex(parser_tree['value'])
        # print(answer)
    else:
        result = solve_expression(parser_tree)
        # print(str(result))
        solution = r'$solution:' + latex_str + '$'
    print('答案:', solution)
    print('处理结果请到static文件夹下的最新生成的图片查看')
    plt.text(0.1, y_start, solution, fontsize=18)

    #
    # expression_str = r'$expression:' + latex_str + '$'
    # print(expression_str)
    # plt.text(0.1, 0.9, expression_str, fontsize=20)
    # # print(solve_expression(parser_tree))
    # solution = ''
    # answer =''
    # if parser_tree['status'] == STATUS['solved']:
    #     if isinstance(parser_tree['value'], int) or isinstance(parser_tree['value'], float):
    #         solution = r'$result:' + str(parser_tree['value']) + '$'
    #         answer = str(parser_tree['value'])
    #     else:
    #         solution = r'$result:' + str(latex(parser_tree['value'])) + '$'
    #         answer = str(latex(parser_tree['value']))
    #
    #
    # elif parser_tree['status'] == STATUS['eq1'] or parser_tree['status'] == STATUS['eq2']:
    #     solution = r'$result:' + str(latex(parser_tree['value'])) + '$'
    #     result = solve_expression(parser_tree)
    #     print(result)
    #     answer = result
    # elif parser_tree['status'] == STATUS['other']:
    #     answer = latex(parser_tree['value'])
    #     print(answer)
    # else:
    #     solution = r'$solution:' + latex_str + '$'
    #     answer = latex_str
    # plt.text(0.1, 0.5, solution, fontsize=20)

    plt.xticks([]), plt.yticks([])
    # print(filename.rsplit('.',1)[1])
    save_filename = str(int(time.time()))
    save_filename_dir = SAVE_FOLDER + save_filename
    plt.savefig(save_filename_dir)
    # plt.show()
    plt.close()
    if mode == 'product':
        return save_filename
    elif mode == 'test':
        return latex_str, answer