def marked_by_accuracy(request): char = request.POST.get('char', None) if char is None: return JsonResponse({'status': 'error', 'msg': 'no char'}) l_value = int(request.POST.get('min_value')) r_value = int(request.POST.get('max_value')) if ((r_value <= 0) or (l_value == r_value)): _mark_based_scope(l_value, char) elif (l_value > r_value): count = 0 else: if (r_value > 500): updateNum = Character.objects.filter( char=char, is_correct=0, accuracy__gte=l_value, accuracy__lte=r_value).update(is_correct=1) else: updateNum = Character.objects.filter( char=char, is_correct=0, accuracy__gte=l_value, accuracy__lte=r_value).update(is_correct=-1) Character.update_statistics(char) return JsonResponse({'status': 'ok'})
def run_seg(request, pk): page = get_object_or_404(Page, pk=pk) img_path = page.get_image_path() raw_data = serializers.serialize("python", Region.objects.filter(page_id=pk)) region_lst = [d['fields'] for d in raw_data] char_lst = charseg(img_path, region_lst, pk) for ch in char_lst: char_id = '{0}L{1:02}R{2:02}C{3:03}'.format(pk, ch['line_no'], ch['region_no'], ch['char_no']) character = Character( id=char_id, page_id=pk, char=ch['char'], left=ch['left'], right=ch['right'], top=ch['top'], bottom=ch['bottom'], line_no=ch['line_no'], region_no=ch['region_no'], char_no=ch['char_no'], is_correct=-9 # has not chut char image ) character.save() cut_char.delay(pk) #offline task cut char image return JsonResponse(char_lst, safe=False)
def filter_mark_by_recog(self, request): checked_chars = Character.objects.filter(id__in=request.data["checked_ids"]) unchecked_chars = Character.objects.filter(id__in=request.data["unchecked_ids"]) result_type = request.data["type"] if result_type == 0: for char in checked_chars: char.is_correct = 1 char.save() for char in unchecked_chars: char.is_same = -2 char.save() elif result_type == 1: for char in checked_chars: char.is_correct = -1 char.save() for char in unchecked_chars: char.is_same = 2 char.save() elif result_type == 2: for char in checked_chars: char.is_correct = 1 char.save() for char in unchecked_chars: char.is_correct = -1 char.is_same = 2 char.save() Character.update_statistics(request.data["char"]) return Response({"status": "ok"})
def runSegment(request,page_id): page = Page.objects.get(id=page_id) image_name = page.image.url #print image_name text = page.text image = io.imread(image_name, 0) total_char_lst = process_page(image, text, page_id) character_lst = [] temp_lst = [] line_lst = [] cur_line_no = 0 for ch in total_char_lst: character = Character(id=ch.char_id.strip(), page_id=page_id, char=ch.char, image=ch.char_id.strip() + u'.jpg', left=ch.left, right=ch.right, top=ch.top, bottom=ch.bottom, line_no=ch.line_no, char_no=ch.char_no, is_correct=False) character.width = character.right - character.left character.height = character.bottom - character.top if character.line_no != cur_line_no: if temp_lst: line = CharacterLine(cur_line_no, temp_lst[0].left, temp_lst[0].right, temp_lst) line_lst.append(line) cur_line_no = character.line_no temp_lst = [character] else: temp_lst.append(character) if temp_lst: line = CharacterLine(cur_line_no, temp_lst[0].left, temp_lst[0].right, temp_lst) line_lst.append(line) json_line_lst = json.dumps(line_lst,cls=MyJsonEncoder) return JsonResponse({ u'line_lst': json_line_lst}, safe=False)
def filter_mark_by_recog(self, request): checked_chars = Character.objects.filter( id__in=request.data['checked_ids']) unchecked_chars = Character.objects.filter( id__in=request.data['unchecked_ids']) result_type = request.data['type'] if (result_type == 0): for char in checked_chars: char.is_correct = 1 char.save() for char in unchecked_chars: char.is_same = -2 char.save() elif (result_type == 1): for char in checked_chars: char.is_correct = -1 char.save() for char in unchecked_chars: char.is_same = 2 char.save() elif (result_type == 2): for char in checked_chars: char.is_correct = 1 char.save() for char in unchecked_chars: char.is_correct = -1 char.is_same = 2 char.save() Character.update_statistics(request.data['char']) return Response({'status': 'ok'})
def filter_mark_by_recog(self, request): checked_chars = Character.objects.filter(id__in=request.data['checked_ids']) unchecked_chars = Character.objects.filter(id__in=request.data['unchecked_ids']) result_type = request.data['type'] if (result_type == 0): for char in checked_chars: char.is_correct = 1 char.save() for char in unchecked_chars: char.is_same = -2 char.save() elif (result_type == 1): for char in checked_chars: char.is_correct = -1 char.save() for char in unchecked_chars: char.is_same = 2 char.save() elif (result_type == 2): for char in checked_chars: char.is_correct = 1 char.save() for char in unchecked_chars: char.is_correct = -1 char.is_same = 2 char.save() Character.update_statistics(request.data['char']) return Response({'status': 'ok'})
def segment_one_page(page_id, image_name, text): if text == text1: print 'equal----' try: image = io.imread(image_name, 0) total_char_lst = process_page(image, text, page_id) character_lst = [] for ch in total_char_lst: character = Character(id=ch.char_id.strip(), page_id=page_id, char=ch.char, image=ch.char_id.strip() + u'.jpg', left=ch.left, right=ch.right, top=ch.top, bottom=ch.bottom, line_no=ch.line_no, char_no=ch.char_no, is_correct=False) #character_lst.append(character) print character character.save() #Character.objects.bulk_create(character_lst) except: print 'missing image file'
def _mark_based_scope(l_value, char): if l_value > 500: updateNum = Character.objects.filter( char=char, is_correct=0, accuracy=l_value).update(is_correct=1) else: updateNum = Character.objects.filter( char=char, is_correct=0, accuracy=l_value).update(is_correct=-1) Character.update_statistics(char)
def set_correct(request): if 'id' in request.POST: char_id = request.POST['id'] is_correct = int(request.POST['is_correct']) char = request.POST['char'] char.encode('utf-8') query_set = Character.objects.filter(id=char_id) query_set.update(is_correct=is_correct) Character.update_statistics(char) record = CharMarkRecord.create(request.user, char_id, is_correct, timezone.now()) record.save() data = {'status': 'ok'} elif (('e_charArr[]' in request.POST) or ('c_charArr[]' in request.POST)): # uncheck -> check check_char_number = request.session.get('check_char_number', 0) request.session['check_char_number'] = check_char_number + 1 charArr = request.POST.getlist('e_charArr[]') char = request.POST['char'] time = timezone.now() records = [] if charArr: query_set = Character.objects.filter(id__in=charArr) query_set.update(is_correct=-1) Character.update_statistics(char) for char_id in charArr: record = CharMarkRecord.create(request.user, char_id, -1, time) records.append(record) CharMarkRecord.objects.bulk_create(records) charArr = request.POST.getlist('c_charArr[]') if charArr: query_set = Character.objects.filter(id__in=charArr) query_set.update(is_correct=1) Character.update_statistics(char) for char_id in charArr: record = CharMarkRecord.create(request.user, char_id, 1, time) records.append(record) CharMarkRecord.objects.bulk_create(records) data = {'status': 'ok'} elif ('cl_charArr[]' in request.POST): c_num = int(request.POST['c_num']) e_num = int(request.POST['e_num']) unset_num = e_num + c_num char = request.POST['char'] charArr = request.POST.getlist('cl_charArr[]') Character.objects.filter(id__in=charArr).update(is_correct=0) Character.update_statistics(char) time = timezone.now() records = [] for char_id in charArr: record = CharMarkRecord.create(request.user, char_id, 0, time) records.append(record) if charArr: CharMarkRecord.objects.bulk_create(records) data = {'status': 'ok', 'clear': 'ok'} else: data = {'status': 'error'} return JsonResponse(data)
def set_correct(request): if 'id' in request.POST: char_id = request.POST['id'] is_correct = int(request.POST['is_correct']) char = request.POST['char'] char.encode('utf-8') query_set = Character.objects.filter(id=char_id) query_set.update(is_correct=is_correct) Character.update_statistics(char) record = CharMarkRecord.create(request.user, char_id, is_correct, timezone.now()) record.save() data = {'status': 'ok'} elif (('e_charArr[]' in request.POST) or ('c_charArr[]' in request.POST)): # uncheck -> check check_char_number = request.session.get('check_char_number', 0) request.session['check_char_number'] = check_char_number+1 charArr = request.POST.getlist('e_charArr[]') char = request.POST['char'] time = timezone.now() records = [] if charArr: query_set = Character.objects.filter(id__in =charArr) query_set.update(is_correct=-1) Character.update_statistics(char) for char_id in charArr: record = CharMarkRecord.create(request.user, char_id, -1, time) records.append(record) CharMarkRecord.objects.bulk_create(records) charArr = request.POST.getlist('c_charArr[]') if charArr: query_set = Character.objects.filter(id__in =charArr) query_set.update(is_correct=1) Character.update_statistics(char) for char_id in charArr: record = CharMarkRecord.create(request.user, char_id, 1, time) records.append(record) CharMarkRecord.objects.bulk_create(records) data = {'status': 'ok'} elif ('cl_charArr[]' in request.POST): c_num = int(request.POST['c_num']) e_num = int(request.POST['e_num']) unset_num = e_num + c_num; char = request.POST['char'] charArr = request.POST.getlist('cl_charArr[]') Character.objects.filter(id__in =charArr).update(is_correct=0) Character.update_statistics(char) time = timezone.now() records = [] for char_id in charArr: record = CharMarkRecord.create(request.user, char_id, 0, time) records.append(record) if charArr: CharMarkRecord.objects.bulk_create(records) data = {'status': 'ok', 'clear': 'ok'} else: data = {'status': 'error'} return JsonResponse(data)
def marked_by_accuracy(request): char = request.POST.get('char', None) if char is None: return JsonResponse({'status': 'error', 'msg': 'no char'}) l_value = int(request.POST.get('min_value')) r_value = int(request.POST.get('max_value')) if ((r_value <= 0) or (l_value == r_value)): _mark_based_scope(l_value, char) elif (l_value > r_value): count = 0 else: if (r_value > 500): updateNum = Character.objects.filter(char=char, is_correct=0, accuracy__gte=l_value, accuracy__lte=r_value).update(is_correct=1) else: updateNum = Character.objects.filter(char=char, is_correct=0, accuracy__gte=l_value, accuracy__lte=r_value).update(is_correct=-1) Character.update_statistics(char) return JsonResponse({'status': 'ok'})
def test_file(): vol_no = u'01' page_no = u'001' output = subprocess.check_output( 'cd /media/DATA/work/image-preprocessing/binary_image/%s/%s/; ls *.txt 2>/dev/null' % (vol_no, page_no), shell=True) pos = output.find('.txt') if pos == -1: sys.exit(-1) page_id_prefix = output[:pos - 1] page_id = page_id_prefix + 'a' image_name = u'/media/DATA/work/image-preprocessing/binary_image/%s/%s/%s.jpg' % ( vol_no, page_no, page_id) text_name = u'/media/DATA/work/image-preprocessing/binary_image/%s/%s/%s.txt' % ( vol_no, page_no, page_id) image = io.imread(image_name, 0) # image = io.imread(u'/home/xianbu/custom/1a.jpg', 0) text = u'' with open(text_name, 'r') as f: text = f.read().decode('utf-8') text1 = text process_page(image, text, page_id) total_char_lst = process_page(image, text, page_id) import django django.setup() from segmentation.models import Character character_lst = [] for ch in total_char_lst: character = Character(id=ch.char_id.strip(), page_id=page_id, char=ch.char, image=ch.char_id.strip() + u'.jpg', left=ch.left, right=ch.right, top=ch.top, bottom=ch.bottom, line_no=ch.line_no, char_no=ch.char_no, is_correct=False) #character_lst.append(character) print character character.save()
def runSegment(request, page_id): page = Page.objects.get(id=page_id) image_name = page.image.url #print image_name text = page.text image = io.imread(image_name, 0) total_char_lst = process_page(image, text, page_id) character_lst = [] temp_lst = [] line_lst = [] cur_line_no = 0 for ch in total_char_lst: character = Character(id=ch.char_id.strip(), page_id=page_id, char=ch.char, image=ch.char_id.strip() + u'.jpg', left=ch.left, right=ch.right, top=ch.top, bottom=ch.bottom, line_no=ch.line_no, char_no=ch.char_no, is_correct=False) character.width = character.right - character.left character.height = character.bottom - character.top if character.line_no != cur_line_no: if temp_lst: line = CharacterLine(cur_line_no, temp_lst[0].left, temp_lst[0].right, temp_lst) line_lst.append(line) cur_line_no = character.line_no temp_lst = [character] else: temp_lst.append(character) if temp_lst: line = CharacterLine(cur_line_no, temp_lst[0].left, temp_lst[0].right, temp_lst) line_lst.append(line) json_line_lst = json.dumps(line_lst, cls=MyJsonEncoder) return JsonResponse({u'line_lst': json_line_lst}, safe=False)
def test_file(): vol_no = u'01' page_no = u'001' output = subprocess.check_output( 'cd /media/DATA/work/image-preprocessing/binary_image/%s/%s/; ls *.txt 2>/dev/null' % (vol_no, page_no), shell=True) pos = output.find('.txt') if pos == -1: sys.exit(-1) page_id_prefix = output[:pos - 1] page_id = page_id_prefix + 'a' image_name = u'/media/DATA/work/image-preprocessing/binary_image/%s/%s/%s.jpg' % (vol_no, page_no, page_id) text_name = u'/media/DATA/work/image-preprocessing/binary_image/%s/%s/%s.txt' % (vol_no, page_no, page_id) image = io.imread(image_name, 0) # image = io.imread(u'/home/xianbu/custom/1a.jpg', 0) text = u'' with open(text_name, 'r') as f: text = f.read().decode('utf-8') text1 = text process_page(image, text, page_id) total_char_lst = process_page(image, text, page_id) import django django.setup() from segmentation.models import Character character_lst = [] for ch in total_char_lst: character = Character(id=ch.char_id.strip(), page_id=page_id, char=ch.char, image=ch.char_id.strip() + u'.jpg', left=ch.left, right=ch.right, top=ch.top, bottom=ch.bottom, line_no=ch.line_no, char_no=ch.char_no, is_correct=False) #character_lst.append(character) print character character.save()
def run_seg(request,pk): page = get_object_or_404(Page, pk=pk) img_path = page.get_image_path() raw_data = serializers.serialize("python",Region.objects.filter(page_id=pk)) region_lst = [d['fields'] for d in raw_data] char_lst = charseg(img_path,region_lst, pk) for ch in char_lst: char_id = '{0}L{1:02}R{2:02}C{3:03}'.format(pk,ch['line_no'],ch['region_no'],ch['char_no']) character = Character( id=char_id, page_id= pk, char=ch['char'], left=ch['left'], right=ch['right'], top=ch['top'], bottom=ch['bottom'], line_no=ch['line_no'], region_no = ch['region_no'], char_no=ch['char_no'], is_correct=-9 # has not chut char image ) character.save() cut_char.delay(pk)#offline task cut char image return JsonResponse(char_lst, safe=False)
def segment_one_page(page_id, image_name, text): image_path = u'/home/share/dzj_characters/page_images/%s' % image_name image = io.imread(image_path, 0) total_char_lst = process_page(image, text, page_id) character_lst = [] for ch in total_char_lst: path = u'/home/share/dzj_characters/character_images/%s.jpg' % ch.char_id.strip( ) ch.cut_char_from_page(image, path) character = Character(id=ch.char_id.strip(), page_id=page_id, char=ch.char, image=ch.char_id.strip() + u'.jpg', left=ch.left, right=ch.right, top=ch.top, bottom=ch.bottom, line_no=ch.line_no, char_no=ch.char_no, is_correct=0) character_lst.append(character) Character.objects.filter(page_id=page_id).delete() Character.objects.bulk_create(character_lst)
def recog_all(): for char in CharacterStatistics.objects.order_by('-total_cnt').values_list('char',flat=True)[0:500]: print(char) Character.recog_characters(char)
def recog_all(): for char in CharacterStatistics.objects.order_by('-total_cnt').values_list( 'char', flat=True)[0:500]: print(char) Character.recog_characters(char)
def _mark_based_scope(l_value, char): if l_value > 500: updateNum = Character.objects.filter(char=char, is_correct=0, accuracy=l_value).update(is_correct=1) else: updateNum = Character.objects.filter(char=char, is_correct=0, accuracy=l_value).update(is_correct=-1) Character.update_statistics(char)