def run_seg(request, pk): page = get_object_or_404(Page, pk=pk) img_path = page.get_image_path() raw_data = serializers.serialize("python", Region.objects.filter(page_id=pk)) region_lst = [d['fields'] for d in raw_data] char_lst = charseg(img_path, region_lst, pk) for ch in char_lst: char_id = '{0}L{1:02}R{2:02}C{3:03}'.format(pk, ch['line_no'], ch['region_no'], ch['char_no']) character = Character( id=char_id, page_id=pk, char=ch['char'], left=ch['left'], right=ch['right'], top=ch['top'], bottom=ch['bottom'], line_no=ch['line_no'], region_no=ch['region_no'], char_no=ch['char_no'], is_correct=-9 # has not chut char image ) character.save() cut_char.delay(pk) #offline task cut char image return JsonResponse(char_lst, safe=False)
def segment_one_page(page_id, image_name, text): if text == text1: print 'equal----' try: image = io.imread(image_name, 0) total_char_lst = process_page(image, text, page_id) character_lst = [] for ch in total_char_lst: character = Character(id=ch.char_id.strip(), page_id=page_id, char=ch.char, image=ch.char_id.strip() + u'.jpg', left=ch.left, right=ch.right, top=ch.top, bottom=ch.bottom, line_no=ch.line_no, char_no=ch.char_no, is_correct=False) #character_lst.append(character) print character character.save() #Character.objects.bulk_create(character_lst) except: print 'missing image file'
def test_file(): vol_no = u'01' page_no = u'001' output = subprocess.check_output( 'cd /media/DATA/work/image-preprocessing/binary_image/%s/%s/; ls *.txt 2>/dev/null' % (vol_no, page_no), shell=True) pos = output.find('.txt') if pos == -1: sys.exit(-1) page_id_prefix = output[:pos - 1] page_id = page_id_prefix + 'a' image_name = u'/media/DATA/work/image-preprocessing/binary_image/%s/%s/%s.jpg' % ( vol_no, page_no, page_id) text_name = u'/media/DATA/work/image-preprocessing/binary_image/%s/%s/%s.txt' % ( vol_no, page_no, page_id) image = io.imread(image_name, 0) # image = io.imread(u'/home/xianbu/custom/1a.jpg', 0) text = u'' with open(text_name, 'r') as f: text = f.read().decode('utf-8') text1 = text process_page(image, text, page_id) total_char_lst = process_page(image, text, page_id) import django django.setup() from segmentation.models import Character character_lst = [] for ch in total_char_lst: character = Character(id=ch.char_id.strip(), page_id=page_id, char=ch.char, image=ch.char_id.strip() + u'.jpg', left=ch.left, right=ch.right, top=ch.top, bottom=ch.bottom, line_no=ch.line_no, char_no=ch.char_no, is_correct=False) #character_lst.append(character) print character character.save()
def test_file(): vol_no = u'01' page_no = u'001' output = subprocess.check_output( 'cd /media/DATA/work/image-preprocessing/binary_image/%s/%s/; ls *.txt 2>/dev/null' % (vol_no, page_no), shell=True) pos = output.find('.txt') if pos == -1: sys.exit(-1) page_id_prefix = output[:pos - 1] page_id = page_id_prefix + 'a' image_name = u'/media/DATA/work/image-preprocessing/binary_image/%s/%s/%s.jpg' % (vol_no, page_no, page_id) text_name = u'/media/DATA/work/image-preprocessing/binary_image/%s/%s/%s.txt' % (vol_no, page_no, page_id) image = io.imread(image_name, 0) # image = io.imread(u'/home/xianbu/custom/1a.jpg', 0) text = u'' with open(text_name, 'r') as f: text = f.read().decode('utf-8') text1 = text process_page(image, text, page_id) total_char_lst = process_page(image, text, page_id) import django django.setup() from segmentation.models import Character character_lst = [] for ch in total_char_lst: character = Character(id=ch.char_id.strip(), page_id=page_id, char=ch.char, image=ch.char_id.strip() + u'.jpg', left=ch.left, right=ch.right, top=ch.top, bottom=ch.bottom, line_no=ch.line_no, char_no=ch.char_no, is_correct=False) #character_lst.append(character) print character character.save()
def run_seg(request,pk): page = get_object_or_404(Page, pk=pk) img_path = page.get_image_path() raw_data = serializers.serialize("python",Region.objects.filter(page_id=pk)) region_lst = [d['fields'] for d in raw_data] char_lst = charseg(img_path,region_lst, pk) for ch in char_lst: char_id = '{0}L{1:02}R{2:02}C{3:03}'.format(pk,ch['line_no'],ch['region_no'],ch['char_no']) character = Character( id=char_id, page_id= pk, char=ch['char'], left=ch['left'], right=ch['right'], top=ch['top'], bottom=ch['bottom'], line_no=ch['line_no'], region_no = ch['region_no'], char_no=ch['char_no'], is_correct=-9 # has not chut char image ) character.save() cut_char.delay(pk)#offline task cut char image return JsonResponse(char_lst, safe=False)