Esempio n. 1
0
def qu_words_detection(sen):
    sen = segmentation.seg(sen, mode=True)
    ans = []
    for w in Quantifier:
        if w in sen:
            w_idx = sen.index(w)
            if w_idx == 0:
                pass
            else:
                if w_idx != 0 and len(sen[w_idx - 1]) > 1:
                    first = sen[w_idx - 1][-2]
                    second = sen[w_idx - 1][-1]
                    if first in QuantifierWords:
                        if second in Quantifier[w]:
                            continue
                        else:
                            word_len = 0
                            for i in range(w_idx):
                                word_len += len(sen[i])

                            detect = {
                                "location":
                                word_len,
                                "word":
                                second,
                                "correction": [{
                                    "word":
                                    random.choice(Quantifier[w]),
                                    "score":
                                    None
                                }]
                            }
                            ans.append(detect)
                else:
                    if w_idx > 1 and (sen[w_idx - 2][-1] in QuantifierWords):
                        if len(sen[w_idx - 1]) == 1 and (sen[w_idx - 1]
                                                         in Quantifier[w]):
                            pass
                        elif len(sen[w_idx - 1]) == 1 and (
                                sen[w_idx - 1] not in Quantifier[w]):
                            word_len = 0
                            for i in range(w_idx):
                                word_len += len(sen[i])
                            detect = {
                                "location":
                                word_len,
                                "word":
                                sen[w_idx - 1],
                                "correction": [{
                                    "word":
                                    random.choice(Quantifier[w]),
                                    "score":
                                    None
                                }]
                            }
                            ans.append(detect)
    return ans
Esempio n. 2
0
def sentenceScorer(sentence):
    """
    The language model needs an input in which the sentence is segmented by space

    :param sentence:
    :return:
    """
    sen_cut = " ".join(segmentation.seg(sentence))
    score = LmModel.score(sen_cut)
    return score
Esempio n. 3
0
def replaceWord(sentence):
    ans = []
    sen_list = segmentation.seg(sentence, mode=True)
    old_score = sentenceScorer(sentence)

    temp_sen_list = sen_list[:]

    for idx in range(len(sen_list)):
        pinyin = PIN.get_pinyin(sen_list[idx], "")
        local_score = -10000
        if len(sen_list[idx]) > 1 and pinyin in pinyinConfusionset:
            for candi in pinyinConfusionset[pinyin]:
                temp_sen_list[idx] = candi
                current_score = sentenceScorer("".join(temp_sen_list))
                if current_score > local_score:
                    local_score = current_score
                    replace_word = candi
                    replace_score = current_score

            if local_score - old_score > 4:
                word_len = 0
                for i in range(idx):
                    word_len += len(sen_list[i])
                if len(sen_list[idx]) == len(replace_word):
                    for i in range(len(replace_word)):
                        if replace_word[i] != sen_list[idx][i]:
                            detect = {
                                "location":
                                word_len + i + 1,
                                "sentence_score":
                                old_score,
                                "word":
                                sen_list[idx][i],
                                "correction": [{
                                    "word": replace_word[i],
                                    "score": replace_score
                                }]
                            }
                            ans.append(detect)
        temp_sen_list = sen_list[:]
    return ans
        data = data[msg_size:]


        #pick_data=pickle.loads(frame_data, fix_imports=True, encoding="bytes")
        pick_data=pickle.loads(frame_data)
        #print(frame)
        #print(np.load(image))
        print(pick_data)
        print(len(pick_data))
        image = cv2.imdecode(pick_data[1], cv2.IMREAD_COLOR)
        #image = cv2.imdecode(pick_data, -1)
        cv2.imwrite("output1.png",image)

        break

    img = seg('/home/lab05/kaggle_dir/project_socket_server/output1.png')
#     print(answer)
    print(img.shape)
    print(type(img))
    
#     img = cv2.imread(answer)
    print("사진크기:{}".format(img.shape))
    encode_param = [int(cv2.IMWRITE_PNG_COMPRESSION), 9]
    img = cv2.imencode('.png', img, encode_param)
    data = pickle.dumps(img, protocol=3)
    # data = pickle.dumps(img, 0)
    size = len(data)
    conn.sendall(struct.pack(">L", size) + data)
    
#     cv2.imwrite("output2.png",answer)
Esempio n. 5
0
                    ha='center',
                    va='bottom')
        else:
            ax.text(rect.get_x() + rect.get_width() / 2.,
                    1.02 * height,
                    '%d' % int(height),
                    ha='center',
                    va='bottom')


segs = [[1, 10], [10, 100], [100, 1000], [1000, 10000], [10000, 100000],
        [
            100000,
        ]]

ret = segmentation.seg(seg(), segs)

N = len(segs)
menStd = (0) * N

ind = np.arange(N) * 1  # the x locations for the groups
width = 0.55  # the width of the bars

fig, ax = plt.subplots()
rects1 = ax.bar(ind, ret, width, color='grey', align='center')
#rects2 = ax.bar(ind+1*width, z2, width, color='y', yerr=menStd)

ax.set_ylabel(u'用户数(人/次)')
ax.set_xlabel(u'应还款额(RMB)')
ax.set_title(u'卡牛区间统计')
ax.set_xticks(ind)
Esempio n. 6
0

def autolabel(rects):
    # attach some text labels
    for rect in rects:
        height = rect.get_height()
        if(height > 1000):
            ax.text(rect.get_x()+rect.get_width()/2., 1.02*height, '%.0fK'%float(height/1000.00),
                ha='center', va='bottom')
        else:
            ax.text(rect.get_x()+rect.get_width()/2., 1.02*height, '%d'%int(height),
                ha='center', va='bottom')

segs = [[1,10],[10,100],[100,1000],[1000,10000],[10000,100000],[100000,]]

ret = segmentation.seg(seg(),segs)

N = len(segs)
menStd =   (0)*N
 
ind = np.arange(N)*1  # the x locations for the groups
width = 0.55       # the width of the bars
 
fig, ax = plt.subplots()
rects1 = ax.bar(ind, ret, width, color='grey',align='center')
#rects2 = ax.bar(ind+1*width, z2, width, color='y', yerr=menStd)
 
ax.set_ylabel(u'用户数(人/次)')
ax.set_xlabel(u'应还款额(RMB)')
ax.set_title(u'卡牛区间统计')
ax.set_xticks(ind)
Esempio n. 7
0
def replaceOneCharacter(idx, sen_list, sentence):
    old_score = sentenceScorer(sentence) + 3.7
    double_check = False

    word = sen_list[idx]
    word_next_dict = {}
    word_deviation = 0

    ## To see whether there contains same words in the following three words
    if idx < len(sen_list) - 1:
        temp_count = 1
        check_len = 0  # this variable is used to record the number of words
        while temp_count < 4 and (
                idx + temp_count) < len(sen_list) - 1 and check_len <= 4:
            check_len += len(sen_list[idx + temp_count])
            temp_count += 1
            word_next_dict[sen_list[idx + temp_count]] = temp_count

    for word_next in word_next_dict:
        if word == word_next:
            double_check = True
            word_deviation = word_next_dict[word_next]
            break

    candis = dict()
    try:
        candidates = Confusionset[word]
    except:
        return None

    max_score = -100000
    replacement_ch = ""
    for ch in candidates:
        temp_list = sen_list[:]
        if not double_check:
            temp_list[idx] = ch
        else:
            temp_list[idx] = ch
            temp_list[idx + word_deviation] = ch

        new_score = sentenceScorer("".join(temp_list))

        if len(segmentation.seg(sentence)) > len(
                segmentation.seg("".join(temp_list))):
            new_score += 5.4

        if abs(new_score - old_score) <= 1:
            new_score -= 4

        if new_score > old_score:
            candis[new_score] = ch

        if new_score > max_score:
            max_score = new_score
            replacement_ch = ch

    if max_score > old_score:
        candi_top = []
        for i in sorted(candis, reverse=True)[:min(1, len(candis))]:
            candi_top.append({"word": candis[i], "score": i})

        return replacement_ch, word, candi_top, old_score, double_check, word_deviation, max_score

    else:
        return None
Esempio n. 8
0
def detectSentence(sentence):
    t1 = time.time()
    final_result = dict()
    sen_list = segmentation.seg(sentence)
    temp_sen_list = sen_list[:]

    error = {"error": []}
    index = 0
    local_res = None  # this variable is used for keeping the detected result
    skip_index = []
    location = 1

    for idx in range(len(temp_sen_list)):
        """
       Since we use the segmentation tool, the location need some more operations.
       """
        if idx == 0:
            location = 1
        else:
            location += len(temp_sen_list[idx - 1])

        word = temp_sen_list[idx]

        if index in skip_index:  # If double check is True, we will not continue to detect the following 3 words
            continue

        if len(word) == 1 and isPunc(
                word) and word not in SkipWord and word not in SingleWord:
            if LmModel.score(word) <= -6.2:
                ans = replaceOneCharacter(idx, temp_sen_list, sentence)
                if ans is not None and (local_res is None
                                        or ans[-1] > local_res[-1]):
                    error_location = location
                    local_res = copy.deepcopy(ans)
                    error_idx = idx

        else:
            if local_res is not None:
                detect_word = local_res[1]
                tops = local_res[2]
                double_check = local_res[4]
                word_deviation = local_res[5]
                old_score = local_res[3]
                if not double_check:
                    detect = {
                        "location": error_location,
                        "sentence_score": old_score,
                        "word": detect_word,
                        "correction": tops
                    }
                    error["error"].append(detect)
                else:
                    detect1 = {
                        "location": error_location,
                        "sentence_score": old_score,
                        "word": detect_word,
                        "correction": tops
                    }
                    detect2 = {
                        "location":
                        error_location + sum([
                            len(temp_sen_list[k])
                            for k in range(error_idx, error_idx +
                                           word_deviation)
                        ]),
                        "sentence_score":
                        old_score,
                        "word":
                        detect_word,
                        "correction":
                        tops
                    }
                    error["error"].append(detect1)
                    error["error"].append(detect2)

                    skip_index.extend([
                        k for k in range(error_idx, error_idx + word_deviation)
                    ])
                local_res = None

            if word in TeacherWord:
                temp_location = location
                for i in range(len(word)):
                    temp_location += i
                    if word[i] != TeacherWord[word[i]]:
                        detect = {
                            "location":
                            temp_location,
                            "word":
                            word[i],
                            "correction": [{
                                "word": TeacherWord[word[i]],
                                "score": None
                            }]
                        }
                        error["error"].append(detect)

    if local_res is not None:
        detect_word = local_res[1]
        tops = local_res[2]
        double_check = local_res[4]
        word_deviation = local_res[5]
        old_score = local_res[3]
        if not double_check:
            detect = {
                "location": error_location,
                "sentence_score": old_score,
                "word": detect_word,
                "correction": tops
            }
            error["error"].append(detect)
        else:
            detect1 = {
                "location": error_location,
                "sentence_score": old_score,
                "word": detect_word,
                "correction": tops
            }
            detect2 = {
                "location":
                error_location + sum([
                    len(temp_sen_list[k])
                    for k in range(error_idx, error_idx + word_deviation)
                ]),
                "sentence_score":
                old_score,
                "word":
                detect_word,
                "correction":
                tops
            }
            error["error"].append(detect1)
            error["error"].append(detect2)

    t2 = time.time()
    qu_res = qu_words_detection(sentence)

    if qu_res:
        error["error"].extend(qu_res)

    word_res = replaceWord(sentence)
    if len(word_res) > 0:
        error["error"].extend(word_res)

    time_usage = {"time_usage": "%.4f" % (t2 - t1)}
    final_result = dict(final_result, **error)
    final_result = dict(final_result, **time_usage)

    return final_result