uidx = 0 # count batch loss_s = 0. # count loss ud_s = 0 # time for training an epoch validFreq = -1 saveFreq = -1 sampleFreq = -1 dispFreq = 100 if validFreq == -1: validFreq = len(train) if saveFreq == -1: saveFreq = len(train) if sampleFreq == -1: sampleFreq = len(train) # initialize model WAP_model = Encoder_Decoder(params) if init_param_flag: WAP_model.apply(weight_init) if multi_gpu_flag: WAP_model = nn.DataParallel(WAP_model, device_ids=[0, 1]) WAP_model.cuda() # print model's parameters model_params = WAP_model.named_parameters() for k, v in model_params: print(k) # loss function criterion = torch.nn.CrossEntropyLoss(reduce=False) # optimizer optimizer = optim.Adadelta(WAP_model.parameters(),
def main(model_path, dictionary_target, fea, latex, saveto, output, beam_k=5): # model architecture params = {} params['n'] = 256 params['m'] = 256 params['dim_attention'] = 512 params['D'] = 684 params['K'] = 111 params['growthRate'] = 24 params['reduction'] = 0.5 params['bottleneck'] = True params['use_dropout'] = True params['input_channels'] = 1 # load model model = Encoder_Decoder(params) model.load_state_dict( torch.load(model_path, map_location=lambda storage, loc: storage)) model.cuda() # load dictionary worddicts = load_dict(dictionary_target) worddicts_r = [None] * len(worddicts) for kk, vv in worddicts.items(): worddicts_r[vv] = kk # load data test, test_uid_list = dataIterator(fea, latex, worddicts, batch_size=8, batch_Imagesize=500000, maxlen=20000, maxImagesize=500000) # testing model.eval() with torch.no_grad(): fpp_sample = open(saveto, 'w') test_count_idx = 0 print('Decoding ... ') for x, y in test: for xx in x: print('%d : %s' % (test_count_idx + 1, test_uid_list[test_count_idx])) xx_pad = xx.astype(np.float32) / 255. xx_pad = torch.from_numpy( xx_pad[None, :, :, :]).cuda() # (1,1,H,W) sample, score = gen_sample(model, xx_pad, params, False, k=beam_k, maxlen=1000) score = score / np.array([len(s) for s in sample]) ss = sample[score.argmin()] # write decoding results fpp_sample.write(test_uid_list[test_count_idx]) test_count_idx = test_count_idx + 1 # symbols (without <eos>) for vv in ss: if vv == 0: # <eos> break fpp_sample.write(' ' + worddicts_r[vv]) fpp_sample.write('\n') fpp_sample.close() print('test set decode done') os.system('python compute-wer.py ' + saveto + ' ' + latex + ' ' + output) fpp = open(output) stuff = fpp.readlines() fpp.close() m = re.search('WER (.*)\n', stuff[0]) test_per = 100. * float(m.group(1)) m = re.search('ExpRate (.*)\n', stuff[1]) test_sacc = 100. * float(m.group(1)) print('Valid WER: %.2f%%, ExpRate: %.2f%%' % (test_per, test_sacc))
def test(text_detection_modelpara, ocr_modelpara, dictionary_target): # load net net = CRAFT() # initialize print('Loading text detection model from checkpoint {}'.format( text_detection_modelpara)) if args.cuda: net.load_state_dict(copyStateDict( torch.load(text_detection_modelpara))) else: net.load_state_dict( copyStateDict( torch.load(text_detection_modelpara, map_location='cpu'))) if args.cuda: net = net.cuda() net = torch.nn.DataParallel(net) cudnn.benchmark = False params = {} params['n'] = 256 params['m'] = 256 params['dim_attention'] = 512 params['D'] = 684 params['K'] = 5748 params['growthRate'] = 24 params['reduction'] = 0.5 params['bottleneck'] = True params['use_dropout'] = True params['input_channels'] = 3 params['cuda'] = args.cuda # load model OCR = Encoder_Decoder(params) if args.cuda: OCR.load_state_dict(copyStateDict(torch.load(ocr_modelpara))) else: OCR.load_state_dict( copyStateDict(torch.load(ocr_modelpara, map_location='cpu'))) if args.cuda: #OCR = OCR.cuda() OCR = torch.nn.DataParallel(OCR) cudnn.benchmark = False OCR.eval() net.eval() # load dictionary worddicts = load_dict(dictionary_target) worddicts_r = [None] * len(worddicts) for kk, vv in worddicts.items(): worddicts_r[vv] = kk t = time.time() fontPIL = '/usr/share/fonts/truetype/fonts-japanese-gothic.ttf' # japanese font size = 40 colorBGR = (0, 0, 255) paper = ET.Element('paper') paper.set('xmlns', "http://codh.rois.ac.jp/modern-magazine/") # load data for k, image_path in enumerate(image_list[:]): print("Test image {:d}/{:d}: {:s}".format(k + 1, len(image_list), image_path), end='\r') res_img_file = result_folder + "res_" + os.path.basename(image_path) #print (res_img_file, os.path.basename(image_path), os.path.exists(res_img_file)) #if os.path.exists(res_img_file): continue #image = imgproc.loadImage(image_path) '''image = cv2.imread(image_path, cv2.IMREAD_COLOR) image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) ret2,image = cv2.threshold(image,0,255,cv2.THRESH_BINARY+cv2.THRESH_OTSU) height = image.shape[0] width = image.shape[1] scale = 1000.0/height H = int(image.shape[0] * scale) W = int(image.shape[1] * scale) image = cv2.resize(image , (W, H)) print(image.shape, image_path) cv2.imwrite(image_path, image) continue''' image = cv2.imread(image_path, cv2.IMREAD_COLOR) image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) h, w = image.shape[0], image.shape[1] print(image_path) page = ET.SubElement(paper, "page") page.set('file', os.path.basename(image_path).replace('.jpg', '')) page.set('height', str(h)) page.set('width', str(w)) page.set('dpi', str(100)) page.set('number', str(1)) bboxes, polys, score_text = test_net(net, image, args.text_threshold, args.link_threshold, args.low_text, args.cuda, args.poly) text = [] localtions = [] for i, box in enumerate(bboxes): poly = np.array(box).astype(np.int32) min_x = np.min(poly[:, 0]) max_x = np.max(poly[:, 0]) min_y = np.min(poly[:, 1]) max_y = np.max(poly[:, 1]) if min_x < 0: min_x = 0 if min_y < 0: min_y = 0 #image = cv2.rectangle(image,(min_x,min_y),(max_x,max_y),(0,255,0),3) input_img = image[min_y:max_y, min_x:max_x] w = max_x - min_x + 1 h = max_y - min_y + 1 line = ET.SubElement(page, "line") line.set("x", str(min_x)) line.set("y", str(min_y)) line.set("height", str(h)) line.set("width", str(w)) if w < h: rate = 20.0 / w w = int(round(w * rate)) h = int(round(h * rate / 20.0) * 20) else: rate = 20.0 / h w = int(round(w * rate / 20.0) * 20) h = int(round(h * rate)) #print (w, h, rate) input_img = cv2.resize(input_img, (w, h)) mat = np.zeros([1, h, w], dtype='uint8') mat[0, :, :] = 0.299 * input_img[:, :, 0] + 0.587 * input_img[:, :, 1] + 0.114 * input_img[:, :, 2] xx_pad = mat.astype(np.float32) / 255. xx_pad = torch.from_numpy(xx_pad[None, :, :, :]) # (1,1,H,W) if args.cuda: xx_pad.cuda() with torch.no_grad(): sample, score, alpha_past_list = gen_sample(OCR, xx_pad, params, args.cuda, k=10, maxlen=600) score = score / np.array([len(s) for s in sample]) ss = sample[score.argmin()] alpha_past = alpha_past_list[score.argmin()] result = '' i = 0 location = [] for vv in ss: if vv == 0: # <eol> break alpha = alpha_past[i] if i != 0: alpha = alpha_past[i] - alpha_past[i - 1] (y, x) = np.unravel_index(np.argmax(alpha, axis=None), alpha.shape) #print (int(16* x /rate), int(16* y/rate) , chr(int(worddicts_r[vv],16))) location.append( [int(16 * x / rate) + min_x, int(16 * y / rate) + min_y]) #image = cv2.circle(image,(int(16* x/rate) - 8 + min_x, int(16* y/rate) + 8 + min_y),25, (0,0,255), -1) result += chr(int(worddicts_r[vv], 16)) '''char = ET.SubElement(line, "char") char.set('num_cand', '1') char.set('x', str(int(16* x/rate) - 8 + min_x)) char.set('y', str(int(16* y/rate) + 8 + min_y)) res = ET.SubElement(char, "result") res.set('CC', str(100)) res.text = chr(int(worddicts_r[vv],16)) cand = ET.SubElement(char, "cand") cand.set('CC', str(100)) cand.text = chr(int(worddicts_r[vv],16))''' i += 1 line.text = result text.append(result) localtions.append(location) image = cv2_putText_1(img=image, text=result, org=(min_x, max_x, min_y, max_y), fontFace=fontPIL, fontScale=size, color=colorBGR) print('save image') # save score text filename, file_ext = os.path.splitext(os.path.basename(image_path)) mask_file = result_folder + "/res_" + filename + '_mask.jpg' #cv2.imwrite(mask_file, score_text) file_utils.saveResult(image_path, image, polys, dirname=result_folder) xml_string = ET.tostring(paper, 'Shift_JIS') fout = codecs.open('./data/result.xml', 'w', 'shift_jis') fout.write(xml_string.decode('shift_jis')) fout.close() print("elapsed time : {}s".format(time.time() - t))
def main(model_path, dictionary_target, dictionary_retarget, fea, output_path, k=5): # set parameters params = {} params['n'] = 256 params['m'] = 256 params['dim_attention'] = 512 params['D'] = 684 params['K'] = 106 params['growthRate'] = 24 params['reduction'] = 0.5 params['bottleneck'] = True params['use_dropout'] = True params['input_channels'] = 1 params['Kre'] = 7 params['mre'] = 256 maxlen = 300 params['maxlen'] = maxlen # load model model = Encoder_Decoder(params) model.load_state_dict(torch.load(model_path,map_location=lambda storage,loc:storage)) # enable CUDA model.cuda() # load source dictionary and invert worddicts = load_dict(dictionary_target) print ('total chars',len(worddicts)) worddicts_r = [None] * len(worddicts) for kk, vv in worddicts.items(): worddicts_r[vv] = kk reworddicts = load_dict(dictionary_retarget) print ('total relations',len(reworddicts)) reworddicts_r = [None] * len(reworddicts) for kk, vv in reworddicts.items(): reworddicts_r[vv] = kk valid,valid_uid_list = dataIterator_test(fea, worddicts, reworddicts, batch_size=8, batch_Imagesize=800000,maxImagesize=800000) # change model's mode to eval model.eval() valid_out_path = output_path + 'symbol_relation/' valid_malpha_path = output_path + 'memory_alpha/' if not os.path.exists(valid_out_path): os.mkdir(valid_out_path) if not os.path.exists(valid_malpha_path): os.mkdir(valid_malpha_path) valid_count_idx = 0 print('Decoding ... ') ud_epoch = time.time() model.eval() with torch.no_grad(): for x in valid: for xx in x: # xx:当前batch中的一个数据,numpy print('%d : %s' % (valid_count_idx + 1, valid_uid_list[valid_count_idx])) xx_pad = np.zeros((xx.shape[0], xx.shape[1], xx.shape[2]), dtype='float32') # (1,height,width) xx_pad[:, :, :] = xx / 255. xx_pad = torch.from_numpy(xx_pad[None, :, :, :]).cuda() score, sample, malpha_list, relation_sample = \ gen_sample(model, xx_pad, params, gpu_flag=False, k=k, maxlen=maxlen) # sys.exit() if len(score) != 0: score = score / np.array([len(s) for s in sample]) # relation_score = relation_score / np.array([len(r) for r in relation_sample]) min_score_index = score.argmin() ss = sample[min_score_index] rs = relation_sample[min_score_index] mali = malpha_list[min_score_index] fpp_sample = open(valid_out_path+valid_uid_list[valid_count_idx]+'.txt','w') file_malpha_sample = valid_malpha_path+valid_uid_list[valid_count_idx]+'_malpha.txt' for i, [vv, rv] in enumerate(zip(ss, rs)): if vv == 0: string = worddicts_r[vv] + '\tEnd\n' fpp_sample.write(string) break else: if i == 0: string = worddicts_r[vv] + '\tStart\n' else: string = worddicts_r[vv] + '\t' + reworddicts_r[rv] + '\n' fpp_sample.write(string) np.savetxt(file_malpha_sample, np.array(mali)) fpp_sample.close() valid_count_idx=valid_count_idx+1 print('test set decode done') ud_epoch = (time.time() - ud_epoch) / 60. print('epoch cost time ... ', ud_epoch)
def main(model_path, dictionary_target, fea, latex, saveto, output, beam_k=5): # model architecture params = {} params['n'] = 256 params['m'] = 256 params['dim_attention'] = 512 params['D'] = 684 params['K'] = 5748 params['growthRate'] = 24 params['reduction'] = 0.5 params['bottleneck'] = True params['use_dropout'] = True params['input_channels'] = 3 # load model model = Encoder_Decoder(params) model.load_state_dict(torch.load(model_path, map_location=lambda storage, loc: storage)) model.cuda() # load dictionary worddicts = load_dict(dictionary_target) worddicts_r = [None] * len(worddicts) for kk, vv in worddicts.items(): worddicts_r[vv] = kk start_time = time.time() channels = 1 folder = './kokumin/' out = './kokuminOut/' index = 0 # testing model.eval() with torch.no_grad(): for img_file in os.listdir(folder): if '.jpg' in img_file: label_file = folder + 'res_' + img_file.replace('jpg', 'txt') if os.path.isfile(label_file) == False: continue out_file = out + img_file out_txtfile = out + img_file.replace('jpg', 'txt') img_file = folder + img_file #print img_file, label_file im = imread(img_file) arr = Image.fromarray(im).convert('RGB') draw = ImageDraw.Draw(arr) #print im.shape with open(label_file) as f: BBs = f.readlines() BBs = [x.strip().split(',') for x in BBs] f = open(out_txtfile, 'w') for BB in BBs: x1 = min(int(BB[0]), int(BB[2]), int(BB[4]), int(BB[6])) y1 = min(int(BB[1]), int(BB[3]), int(BB[5]), int(BB[7])) x2 = max(int(BB[0]), int(BB[2]), int(BB[4]), int(BB[6])) y2 = max(int(BB[1]), int(BB[3]), int(BB[5]), int(BB[7])) if x1 < 0: x1 = 0 if y1 < 0: y1 = 0 draw.rectangle((x1, y1, x2, y2), fill=None, outline=(255, 0 , 0)) f.write(str(x1) + ',' + str(y1) + ',' + str(x2) + ',' + str(y2) + ',') input_img = im[y1:y2, x1:x2] w = x2 - x1 + 1 h = y2 - y1 + 1 #print x1, y1, x2, y2 #print w, h if w < h: rate = 20.0/w w = int(round(w*rate)) h = int(round(h* rate / 20.0) * 20) else: rate = 20.0/h w = int(round(w*rate / 20.0) * 20) h = int(round(h* rate)) #print w, h input_img = imresize(input_img, (h,w)) mat = np.zeros([channels, h, w], dtype='uint8') mat[0,:,:] = input_img #mat[0,:,:] = 0.299* input_img[:, :, 0] + 0.587 * input_img[:, :, 1] + 0.114 * input_img[:, :, 2] xx_pad = mat.astype(np.float32) / 255. xx_pad = torch.from_numpy(xx_pad[None, :, :, :]).cuda() # (1,1,H,W) sample, score, alpha_past_list = gen_sample(model, xx_pad, params, False, k=beam_k, maxlen=600) score = score / np.array([len(s) for s in sample]) ss = sample[score.argmin()] result = '' for vv in ss: if vv == 0: # <eol> break result += worddicts_r[vv] + ' ' print ('resutl:', index, result) f.write(result + '\n') f.close() arr.save(out_file,"JPEG")
KL_loss_s = 0. loss_s = 0. ud_s = 0 # time for training an epoch validFreq = -1 saveFreq = -1 sampleFreq = -1 dispFreq = 100 if validFreq == -1: validFreq = len(train) if saveFreq == -1: saveFreq = len(train) if sampleFreq == -1: sampleFreq = len(train) # initialize model WAP_model = Encoder_Decoder(params) if init_param_flag: WAP_model.apply(weight_init) if multi_gpu_flag: WAP_model = nn.DataParallel(WAP_model, device_ids=[0, 1, 2, 3]) if reload_flag: WAP_model.load_state_dict( torch.load(saveto, map_location=lambda storage, loc: storage)) WAP_model.cuda() # print model's parameters model_params = WAP_model.named_parameters() for k, v in model_params: print(k) # loss function