def temp_save(self, request, pk): task = PageTask.objects.get(pk=pk) if (task.status != TaskStatus.NOT_READY and task.owner != request.user) or \ (task.status == TaskStatus.NOT_READY and request.user.is_authenticated): if not request.user.is_admin: return Response({"status": -1, "msg": "No Permission!"}) if PageVerifyTask.objects.filter( schedule=task.schedule, pagerect=task.pagerect, status__gte=TaskStatus.HANDLING).first(): return Response({"status": -1, "msg": "审定任务已开始,保存已屏蔽!"}) if 'current_x' in request.data: task.current_x = request.data['current_x'] task.current_y = request.data['current_y'] task.save(update_fields=['current_x', 'current_y']) rects = request.data['rects'] DeletionCheckItem.direct_delete_rects(rects, task) _rects = [rect for rect in filter(lambda x: x['op'] != 3, rects)] for r in _rects: r['page_pid'] = task.pagerect.page.pk r['line_no'] = 0 r['char_no'] = 0 rectset = RectWriterSerializer(data=_rects, many=True) rectset.is_valid() Rect.bulk_insert_or_replace(rectset.data) PageRect.reformat_rects(task.pagerect.page.pk) return Response({"status": 0, "task_id": pk})
def test_reformat_page(self): page_pid = 'YB000860_001_02_0' count = Rect.objects.count() # print(Rect.objects.filter(page_pid=page_pid).values_list('cid', flat=True)) PageRect.reformat_rects('YB000860_001_02_0') # print(Rect.objects.filter(page_pid=page_pid).values_list('cid', flat=True)) self.assertEquals(Rect.objects.count(), count)
def task_done(self, rects, task): # 直接過濾掉被刪除的框 DeletionCheckItem.direct_delete_rects(rects, task) _rects = [rect for rect in filter(lambda x: x['op'] != 3, rects)] for r in _rects: r['page_pid'] = task.pagerect.page.pk r['line_no'] = 0 r['char_no'] = 0 rectset = RectWriterSerializer(data=_rects, many=True) rectset.is_valid() Rect.bulk_insert_or_replace(rectset.data) PageRect.reformat_rects(task.pagerect.page.pk) task.done()
def parsePage(self, data): lineMatcher = self.linePattern.match(data) if lineMatcher: img = lineMatcher.group('pageImg') pageCode = self.parsePageCode(img) # if not pageCode or not is_img_exist(pageCode): # self.notFundImgList.append(img) # return [] rectData = lineMatcher.group('rectData') txtData = lineMatcher.group('txtData') rectColumnArr = self.separate.split(rectData) txtColumnArr = self.separate.split(txtData) #maxColumnCount = max(len(rectColumnArr), len(txtColumnArr)) columnNum = 0 maxLineCount = 0 pageRectSetList = [] pageRectModelList = [] for i in range(len(rectColumnArr)): #以切块数据列数为准. rectIter = self.rectPattern.finditer(rectColumnArr[columnNum]) txtColumn = txtColumnArr[columnNum] columnNum += 1 #按人类习惯列号以1为开始 lineNum = 0 if rectIter: for rect in rectIter: rectDict = rect.groupdict() word = txtColumn[lineNum] if word: rectDict['ch'] = word lineNum += 1 #按人类习惯用法行号以1为开始. maxLineCount = max(lineNum, maxLineCount) rectDict['char_no'] = lineNum rectDict['line_no'] = len( rectColumnArr) - columnNum + 1 rectDict['w'] = (int(rectDict['w']) - int(rectDict['x'])) / 2 rectDict['x'] = rectDict['x'] / 2 rectDict['h'] = int(rectDict['h']) - int(rectDict['y']) rectDict['pcode'] = pageCode # pageCode用于字块找出字图和字列图 pageRectSetList.append(rectDict) model = Rect.generate(rectDict) if model: pageRectModelList.append(model) pageRect = PageRect() pageRect.batch = self.batch # try: # pageRect.page = Page.objects.get(code=imgPath) # except ObjectDoesNotExist: # pass pageRect.code = pageCode pageRect.column_count = columnNum pageRect.line_count = maxLineCount pageRect.rect_set = json.dumps(pageRectSetList, ensure_ascii=False) # self.savePageRect(pageRect) # 批量保存每一列的Rect数据 self.saveRectSet(pageRectModelList, pageRect) return pageRectSetList return []
def task_redo(self, request, pk): task = PageVerifyTask.objects.get(pk=pk) if (task.owner != request.user): return Response({"status": -1, "msg": "No Permission!"}) if 'current_x' in request.data: task.current_x = request.data['current_x'] task.current_y = request.data['current_y'] task.save(update_fields=['current_x', 'current_y']) rects = request.data['rects'] DeletionCheckItem.direct_delete_rects(rects, task) _rects = [rect for rect in filter(lambda x: x['op'] != 3, rects)] for r in _rects: r['page_pid'] = task.pagerect.page.pk r['line_no'] = 0 r['char_no'] = 0 rectset = RectWriterSerializer(data=_rects, many=True) rectset.is_valid() Rect.bulk_insert_or_replace(rectset.data) PageRect.reformat_rects(task.pagerect.page.pk) task.redo() return Response({"status": 0, "task_id": pk})
def prepare_gl(cls): t = Tripitaka(code="GL", name="永乐北藏") t.save() t = Tripitaka.objects.get(pk=t.code) sutra = Sutra(tripitaka=t, code='79', variant_code='0', name="经名") sutra.save() sutra = Sutra.objects.get(pk=sutra.sutra_sn) reel = Reel(sutra=sutra, reel_no=58) reel.save() reel = Reel.objects.get(pk=reel.reel_sn) rects = [Rect(**rect) for rect in gl_rect_datas] Rect.objects.bulk_create(rects) json = [{"col_id": "GL_79_1_c1001", "x": 1131, "y": 0, "x1": 1200, "y1": 780}, {"col_id": "GL_79_1_c1002", "x": 1088, "y": 0, "x1": 1131, "y1": 780}, {"col_id": "GL_79_1_c1003", "x": 1035, "y": 0, "x1": 1088, "y1": 780}, {"col_id": "GL_79_1_c1004", "x": 984, "y": 0, "x1": 1035, "y1": 780}, {"col_id": "GL_79_1_c1005", "x": 940, "y": 0, "x1": 984, "y1": 780}, {"col_id": "GL_79_1_c1006", "x": 891, "y": 0, "x1": 940, "y1": 780}, {"col_id": "GL_79_1_c1007", "x": 841, "y": 0, "x1": 891, "y1": 780}, {"col_id": "GL_79_1_c1008", "x": 790, "y": 0, "x1": 841, "y1": 780}, {"col_id": "GL_79_1_c1009", "x": 744, "y": 0, "x1": 790, "y1": 780}, {"col_id": "GL_79_1_c1010", "x": 696, "y": 0, "x1": 744, "y1": 780}, {"col_id": "GL_79_1_c1011", "x": 646, "y": 0, "x1": 696, "y1": 780}, {"col_id": "GL_79_1_c1012", "x": 598, "y": 0, "x1": 646, "y1": 780}, {"col_id": "GL_79_1_c1013", "x": 550, "y": 0, "x1": 598, "y1": 780}, {"col_id": "GL_79_1_c1014", "x": 501, "y": 0, "x1": 550, "y1": 780}, {"col_id": "GL_79_1_c1015", "x": 452, "y": 0, "x1": 501, "y1": 780}, {"col_id": "GL_79_1_c1016", "x": 406, "y": 0, "x1": 452, "y1": 780}, {"col_id": "GL_79_1_c1017", "x": 358, "y": 0, "x1": 406, "y1": 780}, {"col_id": "GL_79_1_c1018", "x": 310, "y": 0, "x1": 358, "y1": 780}, {"col_id": "GL_79_1_c1019", "x": 260, "y": 0, "x1": 310, "y1": 780}, {"col_id": "GL_79_1_c1020", "x": 212, "y": 0, "x1": 260, "y1": 780}, {"col_id": "GL_79_1_c1021", "x": 164, "y": 0, "x1": 212, "y1": 780}, {"col_id": "GL_79_1_c1022", "x": 116, "y": 0, "x1": 164, "y1": 780}, {"col_id": "GL_79_1_c1023", "x": 0, "y": 0, "x1": 116, "y1": 780}] page = Page(reel=reel, reel_no=58, reel_page_no=22, json=json) page.save() PageRect(page_id=page.page_sn, reel_id=reel.reel_sn, line_count=0, column_count=0, rect_set=gl_pagerect_data["char_data"]).save()
def prepare_yb(cls): t = Tripitaka(code="YB", name="永乐北藏") t.save() t = Tripitaka.objects.get(pk=t.code) sutra = Sutra(tripitaka=t, code='79', variant_code='0', name="经名") sutra.save() sutra = Sutra.objects.get(pk=sutra.sutra_sn) reel = Reel(sutra=sutra, reel_no=58) reel.save() reel = Reel.objects.get(pk=reel.reel_sn) rects = [Rect(**rect) for rect in rect_datas] Rect.objects.bulk_create(rects) json = [{"col_id": "YB_27_c1001", "x": 792, "y": 0, "x1": 1200, "y1": 811}, {"col_id": "YB_27_c1002", "x": 741, "y": 0, "x1": 792, "y1": 811}, {"col_id": "YB_27_c1003", "x": 683, "y": 0, "x1": 741, "y1": 811}, {"col_id": "YB_27_c1004", "x": 624, "y": 0, "x1": 683, "y1": 811}, {"col_id": "YB_27_c1005", "x": 567, "y": 0, "x1": 624, "y1": 811}, {"col_id": "YB_27_c1006", "x": 495, "y": 0, "x1": 567, "y1": 811}, {"col_id": "YB_27_c1007", "x": 435, "y": 0, "x1": 495, "y1": 811}, {"col_id": "YB_27_c1008", "x": 378, "y": 0, "x1": 435, "y1": 811}, {"col_id": "YB_27_c1009", "x": 319, "y": 0, "x1": 378, "y1": 811}, {"col_id": "YB_27_c1010", "x": 264, "y": 0, "x1": 319, "y1": 811}, {"col_id": "YB_27_c1011", "x": 185, "y": 0, "x1": 264, "y1": 811}, {"col_id": "YB_27_c1012", "x": 0, "y": 0, "x1": 185, "y1": 811}, {"col_id": "YB_27_c1013", "x": 795, "y": 811, "x1": 1200, "y1": 1625}, {"col_id": "YB_27_c1014", "x": 744, "y": 811, "x1": 795, "y1": 1625}, {"col_id": "YB_27_c1015", "x": 684, "y": 811, "x1": 744, "y1": 1625}, {"col_id": "YB_27_c1016", "x": 630, "y": 811, "x1": 684, "y1": 1625}, {"col_id": "YB_27_c1017", "x": 573, "y": 811, "x1": 630, "y1": 1625}, {"col_id": "YB_27_c1018", "x": 500, "y": 811, "x1": 573, "y1": 1625}, {"col_id": "YB_27_c1019", "x": 444, "y": 811, "x1": 500, "y1": 1625}, {"col_id": "YB_27_c1020", "x": 380, "y": 811, "x1": 444, "y1": 1625}, {"col_id": "YB_27_c1021", "x": 328, "y": 811, "x1": 380, "y1": 1625}, {"col_id": "YB_27_c1022", "x": 271, "y": 811, "x1": 328, "y1": 1625}, {"col_id": "YB_27_c1023", "x": 0, "y": 811, "x1": 271, "y1": 1625}] page = Page(reel=reel, vol_no=58, page_no=22, json=json) page.save() PageRect(page_id=page.page_sn, reel_id=reel.reel_sn, line_count=0, column_count=0, rect_set=pagerect_data["char_data"]).save()
def setUpTestData(cls): t = Tripitaka(code="GZ", name="高丽藏") t.save() t = Tripitaka.objects.get(pk=t.code) sutra = Sutra(tripitaka=t, code='79', variant_code='0', name="经名") sutra.save() sutra = Sutra.objects.get(pk=sutra.sutra_sn) reel = Reel(sutra=sutra, reel_no='58') reel.save() reel = Reel.objects.get(pk=reel.reel_sn) rects = [Rect(**rect) for rect in rect_datas] Rect.objects.bulk_create(rects) json =[{'x1': 1200, 'col_id': 'GZ000790v001p0001001', 'y': 0, 'y1': 780, 'x': 1131}, {'x1': 1131, 'col_id': 'GZ000790v001p0001002', 'y': 0, 'y1': 780, 'x': 1088}, {'x1': 1088, 'col_id': 'GZ000790v001p0001003', 'y': 0, 'y1': 780, 'x': 1035}, {'x1': 1035, 'col_id': 'GZ000790v001p0001004', 'y': 0, 'y1': 780, 'x': 984}, {'x1': 984, 'col_id': 'GZ000790v001p0001005', 'y': 0, 'y1': 780, 'x': 940}, {'x1': 940, 'col_id': 'GZ000790v001p0001006', 'y': 0, 'y1': 780, 'x': 891}, {'x1': 891, 'col_id': 'GZ000790v001p0001007', 'y': 0, 'y1': 780, 'x': 841}, {'x1': 841, 'col_id': 'GZ000790v001p0001008', 'y': 0, 'y1': 780, 'x': 790}, {'x1': 790, 'col_id': 'GZ000790v001p0001009', 'y': 0, 'y1': 780, 'x': 744}, {'x1': 744, 'col_id': 'GZ000790v001p0001010', 'y': 0, 'y1': 780, 'x': 696}, {'x1': 696, 'col_id': 'GZ000790v001p0001011', 'y': 0, 'y1': 780, 'x': 646}, {'x1': 646, 'col_id': 'GZ000790v001p0001012', 'y': 0, 'y1': 780, 'x': 598}, {'x1': 598, 'col_id': 'GZ000790v001p0001013', 'y': 0, 'y1': 780, 'x': 550}, {'x1': 550, 'col_id': 'GZ000790v001p0001014', 'y': 0, 'y1': 780, 'x': 501}, {'x1': 501, 'col_id': 'GZ000790v001p0001015', 'y': 0, 'y1': 780, 'x': 452}, {'x1': 452, 'col_id': 'GZ000790v001p0001016', 'y': 0, 'y1': 780, 'x': 406}, {'x1': 406, 'col_id': 'GZ000790v001p0001017', 'y': 0, 'y1': 780, 'x': 358}, {'x1': 358, 'col_id': 'GZ000790v001p0001018', 'y': 0, 'y1': 780, 'x': 310}, {'x1': 310, 'col_id': 'GZ000790v001p0001019', 'y': 0, 'y1': 780, 'x': 260}, {'x1': 260, 'col_id': 'GZ000790v001p0001020', 'y': 0, 'y1': 780, 'x': 212}, {'x1': 212, 'col_id': 'GZ000790v001p0001021', 'y': 0, 'y1': 780, 'x': 164}, {'x1': 164, 'col_id': 'GZ000790v001p0001022', 'y': 0, 'y1': 780, 'x': 116}, {'x1': 116, 'col_id': 'GZ000790v001p0001023', 'y': 0, 'y1': 780, 'x': 0}] page = Page(reel=reel, vol_no="58", page_no=22, json=json, img_path='some_uri') page.save() PageRect(page=Page.objects.first(), reel_id=reel.reel_sn, line_count=0, column_count=0, rect_set=pagerect_data["char_data"]).save()
def test_reformat_page(self): count = Rect.objects.count() # print(Rect.objects.values_list('cid', flat=True)) PageRect.reformat_rects('YB000790v058p000220') # print(Rect.objects.values_list('cid', flat=True)) self.assertEquals(Rect.objects.count(), count)