Ejemplo n.º 1
0
def search_mdx_sug(dic_pk, query, group, flag):
    global pool, thpool
    sug = []
    if check_system() == 0 and dic_pk == -1:
        cpunums = get_config_con('process_num')

        pool = check_pool_recreate(pool)

        q_list = ((i, query, group) for i in range(cpunums))
        record_list = pool.starmap(multiprocess_search_sug, q_list)
        for r in record_list:
            sug.extend(r)
    elif check_system() == 1 and dic_pk == -1:
        # sug.extend(loop_search_sug(dic_pk, query, flag, group))#for循环查询

        cpunums = get_config_con('process_num')

        thpool = check_threadpool_recreate(thpool)

        q_list = ((i, query, group) for i in range(cpunums))
        record_list = thpool.starmap(multithread_search_sug, q_list)
        for r in record_list:
            sug.extend(r)
    else:  # 单个词典的查询提示
        sug.extend(loop_search_sug(dic_pk, query, flag, group))

    return sug
Ejemplo n.º 2
0
def search(query, is_en, group):
    record_list = []
    query = query.strip()
    t2 = time.perf_counter()
    try:
        record_list = search_mdx_dic(query, record_list, group)
    except FileNotFoundError:
        print_log_info('mdx file not found, mdx search failed, need recache!',
                       DebugLevel.error)
        init_mdict_list(True)
        record_list = search_mdx_dic(query, record_list, group)
        # 重新生成cache文件的代码
    except OperationalError as e:
        print(e)
        print_log_info('modify database failed!', DebugLevel.error)
        if check_system() == 0:
            loop_create_model()
        elif check_system() == 1:
            loop_create_thread_model()
        record_list = search_mdx_dic(query, record_list, group)

    merge_entry_enable = get_config_con('merge_entry_enable')
    builtin_dic_enable = get_config_con('builtin_dic_enable')
    spell_check = get_config_con('spell_check')
    lemmatize = get_config_con('lemmatize')

    t3 = time.perf_counter()
    if merge_entry_enable:
        record_list = merge_record(query, record_list)
    t4 = time.perf_counter()

    if builtin_dic_enable:
        record_list = search_bultin_dic(query, record_list, is_en)

    if lemmatize == 1:
        record_list = lemmatize_func(query, record_list, is_en)
    elif lemmatize == 2:
        if len(record_list) == 0:
            record_list = lemmatize_func(query, record_list, is_en)

    if spell_check == 1:
        record_list = key_spellcheck(query, record_list, is_en)
    elif spell_check == 2:
        if len(record_list) == 0:
            record_list = key_spellcheck(query, record_list, is_en)
    t5 = time.perf_counter()
    # print(round(t3 - t2, 4), round(t4 - t3, 4), round(t5 - t4, 4))
    # print(query, round(t5 - t2, 4))
    return record_list
Ejemplo n.º 3
0
def search_mdx_dic(query, record_list, group):
    global pool, thpool
    # 查询mdx词典
    if check_system() == 0:
        pool = check_pool_recreate(pool)
        cpunums = get_config_con('process_num')
        q_list = ((i, query, group) for i in range(cpunums))
        a_list = pool.starmap(multiprocess_search_mdx, q_list)
        for a in a_list:
            record_list.extend(a)

    else:
        # record_list = loop_search_mdx(record_list, query, group)#for循环查询

        thpool = check_threadpool_recreate(thpool)
        cpunums = get_config_con('process_num')
        q_list = ((i, query, group) for i in range(cpunums))
        a_list = thpool.starmap(multithread_search_mdx, q_list)
        for a in a_list:
            record_list.extend(a)

    return record_list
Ejemplo n.º 4
0
def search_suggestion(request):
    query = request.GET.get('query', '')
    dic_pk = int(request.GET.get('dic_pk', -1))

    if query == '':  # jquery-ui的下拉框的请求是term
        query = request.GET.get('term', '')
    flag = request.GET.get('flag', 20)
    group = int(request.GET.get('dic_group', 0))

    if sug_cache.get(query, group, dic_pk) is None:
        sug = []
        if dic_pk == -1:  # index页面才需要内置词典的查询提示
            sug = search_bultin_dic_sug(query)

        if query != '':
            try:
                sug.extend(search_mdx_sug(dic_pk, query, group, flag))
            except FileNotFoundError:
                print_log_info(
                    'mdx file not found, suggestion search failed, need recache!',
                    2)
                init_mdict_list(True)
                sug.extend(search_mdx_sug(dic_pk, query, group, flag))
            except OperationalError as e:
                print(e)
                print_log_info('modify database failed!', 2)
                # 多进程对sqlite的读写失败,使用for循环来创建数据库
                # 当添加了多个新词典时,有时会报错django.db.utils.OperationalError: disk I/O error,原因可能是sqlite在nfs文件系统上的lock不可靠。
                if check_system() == 0:
                    loop_create_model()
                elif check_system() == 1:
                    loop_create_thread_model()
                sug.extend(search_mdx_sug(dic_pk, query, group, flag))

            if not is_en_func(query):  # 繁简转化
                q_s = t2s.convert(query)
                q_t = s2t.convert(query)
                if q_s != query:
                    sug.extend(search_mdx_sug(dic_pk, q_s, group, flag))
                    if len(q_s) > 1:  # 拆字反查的查询提示
                        result = hc.reverse_query(q_s)
                        if result:
                            for r in result:
                                sug.extend(
                                    search_mdx_sug(dic_pk, r, group, flag))
                else:
                    sug.extend(search_mdx_sug(dic_pk, q_t, group, flag))
                    if len(q_t) > 1:  # 拆字反查的查询提示
                        result = hc.reverse_query(q_t)
                        if result:
                            for r in result:
                                sug.extend(
                                    search_mdx_sug(dic_pk, r, group, flag))
            q2b = strQ2B(query)

            if q2b != query:
                sug.extend(search_mdx_sug(dic_pk, q2b, group, flag))
        return_sug = []
        for s in sug:
            if s.lower() not in return_sug:
                return_sug.append(s.lower())
        return_sug.sort()
        f = -1

        for i in range(0, len(return_sug)):
            if return_sug[i].lower().find(query.lower()) == 0:
                f = i
                break

        # 比如查 性 排序后, 修 开头的词排在了前面,因此当性开头的词存在时,优先展示。
        if f == -1:
            t_list = return_sug[:flag]
        else:
            t_list = (return_sug[f:] + return_sug[:f])[:flag]
        sug_cache.put(query, group, dic_pk, t_list)

    r_list = sug_cache.get(query, group, dic_pk)

    return HttpResponse(json.dumps(r_list))
Ejemplo n.º 5
0
from base.base_func import print_log_info, is_en_func, strQ2B, request_body_serialze, guess_mime
from base.base_func3 import t2s, s2t

from mdict.mdict_utils.chaizi_reverse import HanziChaizi
from mdict.mdict_utils.data_utils import get_or_create_dic
from mdict.mdict_utils.decorator import loop_mdict_list, inner_object
from mdict.mdict_utils.init_utils import init_vars, sound_list, init_mdict_list
from mdict.mdict_utils.mdict_config import *
from mdict.mdict_utils.search_object import SearchObject
from mdict.mdict_utils.search_utils import search, clear_duplication, search_bultin_dic_sug, search_mdx_sug
from base.sys_utils import check_system
from .models import MdictDic, MyMdictEntry, MdictDicGroup, MdictOnline
from .serializers import MdictEntrySerializer, MyMdictEntrySerializer, MdictOnlineSerializer
from .mdict_utils.mdict_func import mdict_root_path, is_local, get_m_path

if check_system() == 0:
    from .mdict_utils.multiprocess_search import loop_create_model
elif check_system() == 1:
    from .mdict_utils.multithread_search import loop_create_thread_model

from .mdict_utils.search_cache import sug_cache, MdictPage, key_paginator

from .mdict_utils.init_database import init_database

init_database()


class MdictEntryViewSet(viewsets.ViewSet):
    authentication_classes = []
    permission_classes = []
Ejemplo n.º 6
0
import configparser
import os

from mysite.settings import BASE_DIR
from base.sys_utils import check_system

# 分两个config,因为windows下创建的config,在wsl下报permissionerror。
if check_system() == 0:
    user_config_path = os.path.join(BASE_DIR, 'config_lin.ini')
else:
    user_config_path = os.path.join(BASE_DIR, 'config_win.ini')

default_config = {
    'COMMON': {
        'process_num': 4,  # 默认进程数
        'cache_num': 30,  # 查询提示缓存的个数
        'search_cache_num': 20,  # 查询(分页)缓存的个数
        'builtin_dic_enable': True,  # 启用内置词典
    },
    'SEARCH': {
        'spell_check': 2,
        'lemmatize': 2,
        'merge_entry_enable': True,
        'merge_entry_num': 5,  # 全局设置,同一个词典一个词条的条数多于等于5个时,将合并为1个。
        'merge_entry_max_length': 500,
        'st_enable': True,  # 繁简和简繁转化
        'chaizi_enable': True,  # 拆字反查
        'fh_char_enable': True,  # 全角转换
        'force_refresh': False,  # 强制刷新
        'select_btn_enable': True,  # 是否启用选择文字弹出框
        'suggestion_number': 15,  # 查询提示显示的数目