def search_mdx_sug(dic_pk, query, group, flag): global pool, thpool sug = [] if check_system() == 0 and dic_pk == -1: cpunums = get_config_con('process_num') pool = check_pool_recreate(pool) q_list = ((i, query, group) for i in range(cpunums)) record_list = pool.starmap(multiprocess_search_sug, q_list) for r in record_list: sug.extend(r) elif check_system() == 1 and dic_pk == -1: # sug.extend(loop_search_sug(dic_pk, query, flag, group))#for循环查询 cpunums = get_config_con('process_num') thpool = check_threadpool_recreate(thpool) q_list = ((i, query, group) for i in range(cpunums)) record_list = thpool.starmap(multithread_search_sug, q_list) for r in record_list: sug.extend(r) else: # 单个词典的查询提示 sug.extend(loop_search_sug(dic_pk, query, flag, group)) return sug
def search(query, is_en, group): record_list = [] query = query.strip() t2 = time.perf_counter() try: record_list = search_mdx_dic(query, record_list, group) except FileNotFoundError: print_log_info('mdx file not found, mdx search failed, need recache!', DebugLevel.error) init_mdict_list(True) record_list = search_mdx_dic(query, record_list, group) # 重新生成cache文件的代码 except OperationalError as e: print(e) print_log_info('modify database failed!', DebugLevel.error) if check_system() == 0: loop_create_model() elif check_system() == 1: loop_create_thread_model() record_list = search_mdx_dic(query, record_list, group) merge_entry_enable = get_config_con('merge_entry_enable') builtin_dic_enable = get_config_con('builtin_dic_enable') spell_check = get_config_con('spell_check') lemmatize = get_config_con('lemmatize') t3 = time.perf_counter() if merge_entry_enable: record_list = merge_record(query, record_list) t4 = time.perf_counter() if builtin_dic_enable: record_list = search_bultin_dic(query, record_list, is_en) if lemmatize == 1: record_list = lemmatize_func(query, record_list, is_en) elif lemmatize == 2: if len(record_list) == 0: record_list = lemmatize_func(query, record_list, is_en) if spell_check == 1: record_list = key_spellcheck(query, record_list, is_en) elif spell_check == 2: if len(record_list) == 0: record_list = key_spellcheck(query, record_list, is_en) t5 = time.perf_counter() # print(round(t3 - t2, 4), round(t4 - t3, 4), round(t5 - t4, 4)) # print(query, round(t5 - t2, 4)) return record_list
def search_mdx_dic(query, record_list, group): global pool, thpool # 查询mdx词典 if check_system() == 0: pool = check_pool_recreate(pool) cpunums = get_config_con('process_num') q_list = ((i, query, group) for i in range(cpunums)) a_list = pool.starmap(multiprocess_search_mdx, q_list) for a in a_list: record_list.extend(a) else: # record_list = loop_search_mdx(record_list, query, group)#for循环查询 thpool = check_threadpool_recreate(thpool) cpunums = get_config_con('process_num') q_list = ((i, query, group) for i in range(cpunums)) a_list = thpool.starmap(multithread_search_mdx, q_list) for a in a_list: record_list.extend(a) return record_list
def search_suggestion(request): query = request.GET.get('query', '') dic_pk = int(request.GET.get('dic_pk', -1)) if query == '': # jquery-ui的下拉框的请求是term query = request.GET.get('term', '') flag = request.GET.get('flag', 20) group = int(request.GET.get('dic_group', 0)) if sug_cache.get(query, group, dic_pk) is None: sug = [] if dic_pk == -1: # index页面才需要内置词典的查询提示 sug = search_bultin_dic_sug(query) if query != '': try: sug.extend(search_mdx_sug(dic_pk, query, group, flag)) except FileNotFoundError: print_log_info( 'mdx file not found, suggestion search failed, need recache!', 2) init_mdict_list(True) sug.extend(search_mdx_sug(dic_pk, query, group, flag)) except OperationalError as e: print(e) print_log_info('modify database failed!', 2) # 多进程对sqlite的读写失败,使用for循环来创建数据库 # 当添加了多个新词典时,有时会报错django.db.utils.OperationalError: disk I/O error,原因可能是sqlite在nfs文件系统上的lock不可靠。 if check_system() == 0: loop_create_model() elif check_system() == 1: loop_create_thread_model() sug.extend(search_mdx_sug(dic_pk, query, group, flag)) if not is_en_func(query): # 繁简转化 q_s = t2s.convert(query) q_t = s2t.convert(query) if q_s != query: sug.extend(search_mdx_sug(dic_pk, q_s, group, flag)) if len(q_s) > 1: # 拆字反查的查询提示 result = hc.reverse_query(q_s) if result: for r in result: sug.extend( search_mdx_sug(dic_pk, r, group, flag)) else: sug.extend(search_mdx_sug(dic_pk, q_t, group, flag)) if len(q_t) > 1: # 拆字反查的查询提示 result = hc.reverse_query(q_t) if result: for r in result: sug.extend( search_mdx_sug(dic_pk, r, group, flag)) q2b = strQ2B(query) if q2b != query: sug.extend(search_mdx_sug(dic_pk, q2b, group, flag)) return_sug = [] for s in sug: if s.lower() not in return_sug: return_sug.append(s.lower()) return_sug.sort() f = -1 for i in range(0, len(return_sug)): if return_sug[i].lower().find(query.lower()) == 0: f = i break # 比如查 性 排序后, 修 开头的词排在了前面,因此当性开头的词存在时,优先展示。 if f == -1: t_list = return_sug[:flag] else: t_list = (return_sug[f:] + return_sug[:f])[:flag] sug_cache.put(query, group, dic_pk, t_list) r_list = sug_cache.get(query, group, dic_pk) return HttpResponse(json.dumps(r_list))
from base.base_func import print_log_info, is_en_func, strQ2B, request_body_serialze, guess_mime from base.base_func3 import t2s, s2t from mdict.mdict_utils.chaizi_reverse import HanziChaizi from mdict.mdict_utils.data_utils import get_or_create_dic from mdict.mdict_utils.decorator import loop_mdict_list, inner_object from mdict.mdict_utils.init_utils import init_vars, sound_list, init_mdict_list from mdict.mdict_utils.mdict_config import * from mdict.mdict_utils.search_object import SearchObject from mdict.mdict_utils.search_utils import search, clear_duplication, search_bultin_dic_sug, search_mdx_sug from base.sys_utils import check_system from .models import MdictDic, MyMdictEntry, MdictDicGroup, MdictOnline from .serializers import MdictEntrySerializer, MyMdictEntrySerializer, MdictOnlineSerializer from .mdict_utils.mdict_func import mdict_root_path, is_local, get_m_path if check_system() == 0: from .mdict_utils.multiprocess_search import loop_create_model elif check_system() == 1: from .mdict_utils.multithread_search import loop_create_thread_model from .mdict_utils.search_cache import sug_cache, MdictPage, key_paginator from .mdict_utils.init_database import init_database init_database() class MdictEntryViewSet(viewsets.ViewSet): authentication_classes = [] permission_classes = []
import configparser import os from mysite.settings import BASE_DIR from base.sys_utils import check_system # 分两个config,因为windows下创建的config,在wsl下报permissionerror。 if check_system() == 0: user_config_path = os.path.join(BASE_DIR, 'config_lin.ini') else: user_config_path = os.path.join(BASE_DIR, 'config_win.ini') default_config = { 'COMMON': { 'process_num': 4, # 默认进程数 'cache_num': 30, # 查询提示缓存的个数 'search_cache_num': 20, # 查询(分页)缓存的个数 'builtin_dic_enable': True, # 启用内置词典 }, 'SEARCH': { 'spell_check': 2, 'lemmatize': 2, 'merge_entry_enable': True, 'merge_entry_num': 5, # 全局设置,同一个词典一个词条的条数多于等于5个时,将合并为1个。 'merge_entry_max_length': 500, 'st_enable': True, # 繁简和简繁转化 'chaizi_enable': True, # 拆字反查 'fh_char_enable': True, # 全角转换 'force_refresh': False, # 强制刷新 'select_btn_enable': True, # 是否启用选择文字弹出框 'suggestion_number': 15, # 查询提示显示的数目