def index_one(k, item): """ 添加一个索引 """ ss = tkitSearch.Search() data = [{ 'title': ",".join(item.get("kg")), 'content': item.get("sentence"), 'path': k }] # print(data) ss.add(data)
def run_index_task(): # tt=tkitText.Text() ss = tkitSearch.Search() # ss.init_search() labels = [1, 2] states = ['1', '2'] i = 0 for k, item in kg.recheck_all(): if (item.get("state") == None and item.get("label") in labels) or ( item.get("state") in states and item.get("label") in labels): # print(item) if i % 1000 == 0: print(i) i = i + 1 data = [{ 'title': ",".join(item.get("kg")), 'content': item.get("sentence"), 'path': k }] # print(data) ss.add(data)
def kg_list(): """ label all 0,1,2 过滤 """ tt = tkitText.Text() # label all 0,1,2 # print(label) i = 0 items = [] ss = tkitSearch.Search() keyword = request.args.get('keyword') start = request.args.get('start') label = request.args.get('label') tp = request.args.get('type') state = request.args.get('state') check = request.args.get('check') if check != None or check == '': check = True limit = request.args.get('limit') if limit == None: limit = 20 if start == None: # kg.tdb.load("var") # try: # start=kg.tdb.get("list_start") # except: # pass pass print("start", start) # kg.tdb.load("kg_mark") print(state) if state == None: state = "2" # states=[] if label == None or len(label) == 0: label = 2 if keyword == None or len(keyword) == 0: print("no kw") jump = ["目", "是", '市镇'] # for k, v in kg.tdb.get_all(start=start): q = {'check': check, "state": state, 'label': int(label)} print('q', q) for item in DB.kg_mark.find(q).limit(int(limit)): k = item['_id'] print(item) # try: # item = kg.tdb.str_dict(v) # except: # pass # if i >= 100: # kg.tdb.load("var") # # kg.tdb.get("list_start") # kg.tdb.put_data([('list_start',list_start)]) # print('list_start',list_start) # break # 索引数据 # index_one(k, item) # if item.get('kg') != None and item.get('state') == state and item.get('check') == check: # if item.get('kg') != None: # index_one(k, item) #自动跳过 # if item.get('kg')[1] in jump: # continue print('选择', item) p, pr = pre(item) item['pre'] = pr item['ai'] = p # 自动保存进程 item['check'] = True item['state'] = '2' # key=tt.md5(item["sentence"]+','.join(item['kg'])) kg.mark_sentence(k, item) s = item['sentence'] for i, w in enumerate(item['kg']): s = s.replace( w, "<span class='kg_" + str(i) + "'>" + w + "</span>") item['sentence_mark'] = s if label == "all": items.append((k, item)) i = i + 1 list_start = k elif item.get('label') == int(label): items.append((k, item)) i = i + 1 list_start = k else: q = { 'check': check, "kg": keyword, "state": state, 'label': int(label) } print('q', q) for item in DB.kg_mark.find(q).limit(int(limit)): k = item['_id'] # print(item) # print("kkk") # if tp == 'title': # result = ss.find_title(keyword) # else: # result = ss.find(keyword) # # print(result) # for one in result: # v = kg.tdb.get(one['path']) # k = one['path'] # try: # item = kg.tdb.str_dict(v) # except: # continue # pass # if item.get('kg')!=None and item.get('state')=='2': if item.get('kg') != None and item.get( 'state') == state and item.get('check') == check: # # 预测内容的概率 # p, pr = pre(item) # item['pre'] = pr # item['ai'] = p item['pre'] = [(0, 1), (0, 1)] item['ai'] = 2 # 自动保存进程 item['check'] = True item['state'] = '2' print("保存数据", item) # key=tt.md5(item["sentence"]+','.join(item['kg'])) kg.mark_sentence(k, item) s = item['sentence'] for i, w in enumerate(item['kg']): s = s.replace( w, "<span class='kg_" + str(i) + "'>" + w + "</span>") item['sentence_mark'] = s if label == "all": items.append((k, item)) i = i + 1 elif item.get('label') == label or item.get('label') == int( label): items.append((k, item)) # print("3333") i = i + 1 # for x in dir(): # print(x,sys.getsizeof(x)/1024/1024,'mb') del ss gc.collect() # if item.get('kg')==None or len(item.get('kg'))==0: if len(items) > 0: q = {'check': True, "state": '2'} checked = DB.kg_mark.find(q).count() q = {'check': None, "state": state} uncheck = DB.kg_mark.find(q).count() return render_template("list.html", **locals()) else: return "没有数据"
def __init__(self): tkitFile.File().mkdir("../tdata") self.tdb= tkitDb.LDB(path="../tdata/lv.db") self.ss=tkitSearch.Search() pass