def __init__(self, images_path, labels_path, image_size): self.image_size = image_size self.image_list, _, _, _ = file_utils.get_files(images_path) _, _, self.label_list, _ = file_utils.get_files(labels_path) self.gaussian_generator = GenerateGaussian(1024, opt.gaussian_region, opt.gaussian_affinity)
def eval2015(net, test_folder, result_folder, text_threshold=0.7, link_threshold=0.4, low_text=0.4): image_list, _, _ = file_utils.get_files(test_folder) t = time.time() res_gt_folder = os.path.join(result_folder, 'gt') res_mask_folder = os.path.join(result_folder, 'mask') # load data for k, image_path in enumerate(image_list): print("Test image {:d}/{:d}: {:s}".format(k + 1, len(image_list), image_path), end='\n') image = imgproc.loadImage(image_path) bboxes, polys, score_text = test_net(net, image, text_threshold, link_threshold, low_text, True, False, 2240, 1.5, False) # save score text filename, file_ext = os.path.splitext(os.path.basename(image_path)) mask_file = os.path.join(res_mask_folder, "/res_" + filename + '_mask.jpg') cv2.imwrite(mask_file, score_text) file_utils.saveResult15(image_path, polys, dirname=res_gt_folder) eval_2015(os.path.join(result_folder, 'gt')) print("elapsed time : {}s".format(time.time() - t))
def PredictDetection(args, net, image_path, opt, reco): """ For test images in a folder """ image_list, _, _ = file_utils.get_files(args.test_folder) result_folder = './result/' if not os.path.isdir(result_folder): os.mkdir(result_folder) t = time.time() # load data # for k, image_path in enumerate(image_list): #print("Test image {:d}/{:d}: {:s}".format(k+1, len(image_list), image_path), end='\r') image = imgproc.loadImage(image_path) bboxes, score_text = test_net(net, image, args.text_threshold, args.link_threshold, args.low_text, args.cuda,args) # save score text #filename, file_ext = os.path.splitext(os.path.basename(image_path)) #mask_file = result_folder + "/res_" + filename + '_mask.jpg' #cv2.imwrite(mask_file, score_text) fl = file_utils.saveResult(image_path, image[:, :, ::-1], bboxes, opt, reco, dirname=result_folder) print("elapsed time detecting : {}s".format(time.time() - t)) log.info(f'elapsed time detecting : {time.time() - t}s') return fl
def test_net(model=None, mapper=None, spaces=None, load_from=None, save_to=None): device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') with torch.no_grad(): image_name_nums = [] res = [] img_lists, _, _, name_list = file_utils.get_files(load_from) for name in name_list: image_name_nums.append(name.split('_')[0]) for k, in_path in enumerate(img_lists): # data pre-processing for passing net image = imgproc.loadImage(in_path) image = imgproc.cvtColorGray(image) image = imgproc.tranformToTensor(image, opt.RECOG_TRAIN_SIZE).unsqueeze(0) image = image.to(device) y = model(image) _, pred = torch.max(y.data, 1) res.append(mapper[0][pred]) # method for saving result, MODE: file | stdout | all ltr_utils.display_stdout(chars=res, space=spaces, img_name=image_name_nums, MODE='file', save_to=save_to)
def infer_detection(impath,net,refine_net,args): #CRAFT """ For test images in a folder """ image_list, _, _ = file_utils.get_files(impath) image_paths = [] image_names = [] #CUSTOMISE START start = impath result_folder = './Results/' data={} t = time.time() # load data for k, image_path in enumerate(image_list): print("Test image {:d}/{:d}: {:s}".format(k+1, len(image_list), image_path), end='\r') image = imgproc.loadImage(image_path) image_name=os.path.relpath(image_path, start) bboxes, polys, score_text, det_scores = test.test_net(net, image, args.text_threshold, args.link_threshold, args.low_text, args.cuda, args.poly, args, refine_net) bbox_score={} index=0 for box,conf in zip(bboxes,det_scores): bbox_score[str(index)]={} bbox_score[str(index)]['detconf']=str(conf) bbox_score[str(index)]['box']=[] for coors in box: temp=[str(coors[0]),str(coors[1])] bbox_score[str(index)]['box'].append(temp) index+=1 data[image_name]=bbox_score # for box_num in range(len(bboxes)): # key = str (det_scores[box_num]) # item = bboxes[box_num] # bbox_score[key]=item # data['word_bboxes'][k]=bbox_score # save score text # filename, file_ext = os.path.splitext(os.path.basename(image_path)) # mask_file = result_folder + "/res_" + filename + '_mask.jpg' # cv2.imwrite(mask_file, score_text) # file_utils.saveResult(image_path, image[:,:,::-1], polys, dirname=result_folder) if not os.path.isdir('./Results'): os.mkdir('./Results') # data.to_csv('./Results_csv/data.csv', sep = ',', na_rep='Unknown') # print(data) with open('./Results/data.json', 'w') as jsonfile: json.dump(data, jsonfile) jsonfile.close() print("elapsed time : {}s".format(time.time() - t))
def main(args, logger=None): # load net net = CRAFT(pretrained=False) # initialize print('Loading weights from checkpoint {}'.format(args.model_path)) if args.cuda: net.load_state_dict(copyStateDict(torch.load(args.model_path))) else: net.load_state_dict( copyStateDict(torch.load(args.model_path, map_location='cpu'))) if args.cuda: net = net.cuda() net = torch.nn.DataParallel(net) cudnn.benchmark = False net.eval() t = time.time() # load data """ For test images in a folder """ image_list, _, _ = file_utils.get_files(args.img_path) est_folder = os.path.join(args.rst_path, 'est') mask_folder = os.path.join(args.rst_path, 'mask') eval_folder = os.path.join(args.rst_path, 'eval') cg.folder_exists(est_folder, create_=True) cg.folder_exists(mask_folder, create_=True) cg.folder_exists(eval_folder, create_=True) for k, image_path in enumerate(image_list): print("Test image {:d}/{:d}: {:s}".format(k + 1, len(image_list), image_path)) image = imgproc.loadImage(image_path) # image = cv2.resize(image, dsize=(768, 768), interpolation=cv2.INTER_CUBIC) ## bboxes, polys, score_text = test_net( net, image, text_threshold=args.text_threshold, link_threshold=args.link_threshold, low_text=args.low_text, cuda=args.cuda, canvas_size=args.canvas_size, mag_ratio=args.mag_ratio, poly=args.poly, show_time=args.show_time) # save score text filename, file_ext = os.path.splitext(os.path.basename(image_path)) mask_file = mask_folder + "/res_" + filename + '_mask.jpg' if not (cg.file_exists(mask_file)): cv2.imwrite(mask_file, score_text) file_utils.saveResult15(image_path, bboxes, dirname=est_folder, mode='test') eval_dataset(est_folder=est_folder, gt_folder=args.gt_path, eval_folder=eval_folder, dataset_type=args.dataset_type) print("elapsed time : {}s".format(time.time() - t))
def test(): ''' ''' np.random.seed(config.RNG_SEED) labels = np.asarray(['__background__', 'speech']) '''INITIALIZE MODEL AND LOAD PRETRAINED MODEL''' layerNum = 101 if config.BACKBONE == 'res152': layerNum = 152 fasterRCNN = resnet(labels, layerNum, pretrained=False, class_agnostic=False) fasterRCNN.create_architecture() print('Loading model from defined path :' + config.PRETRAINED_MODEL_PATH) if config.cuda: model = torch.load(config.PRETRAINED_MODEL_PATH) else: model = torch.load(config.PRETRAINED_MODEL_PATH, map_location=(lambda storage, loc: storage)) fasterRCNN.load_state_dict(model['model']) if 'pooling_mode' in model.keys(): cfg.POOLING_MODE = model['pooling_mode'] if config.cuda: fasterRCNN.cuda() fasterRCNN = torch.nn.DataParallel(fasterRCNN) fasterRCNN.eval() t = time.time() with torch.no_grad(): im_data = Variable(torch.FloatTensor(1).cuda()) im_info = Variable(torch.FloatTensor(1).cuda()) num_boxes = Variable(torch.LongTensor(1).cuda()) gt_boxes = Variable(torch.FloatTensor(1).cuda()) items = [im_data, im_info, num_boxes, gt_boxes] ''' LIST IMAGE FILE ''' img_list, _, _ = file_utils.get_files(config.TEST_IMAGE_PATH) ''' KICK OFF TEST PROCESS ''' for i, img in enumerate(img_list): sys.stdout.write('TEST IMAGES: {:d}/{:d}: {:s} \r'.format( i + 1, len(img_list), img)) sys.stdout.flush() ''' LOAD IMAGE ''' img = imgproc.loadImage(img) img_blob, img_scales = imgproc.getImageBlob(img) ''' PASS THE TEST MODEL AND PREDICT BELOW IM RESULTS ''' alpha_img, vis_img, cuts, bubbles, texts = test_net( fasterRCNN, img, img_blob, img_scales, items, labels, i) fixed_i = file_utils.resultNameNumbering(origin=i, digit=len(img_list)) for cut_idx, cut in enumerate(cuts): file_utils.saveImage(dir=config.CUT_PATH, img=cut, index1=fixed_i, index2=cut_idx, ext='.png') for bub_idx, bubble in enumerate(bubbles): file_utils.saveImage(dir=config.BUBBLE_PATH, img=bubble, index1=fixed_i, index2=bub_idx, ext='.png') for txt_idx, txt in enumerate(texts): file_utils.saveImage(dir=config.TEXT_PATH, img=txt, index1=fixed_i, index2=txt_idx, ext='.png') file_utils.saveImage(dir=config.FINAL_IMAGE_PATH, img=vis_img, index1=fixed_i, ext='.jpg') print("TOTAL TIME : {}s".format(time.time() - t))
start_idx = 1 else: start_idx = 0 new_state_dict = OrderedDict() for k, v in state_dict.items(): name = ".".join(k.split(".")[start_idx:]) new_state_dict[name] = v return new_state_dict def str2bool(v): return v.lower() in ("yes", "y", "true", "t", "1") """ For test images in a folder """ image_list, _, _ = file_utils.get_files('test/pics') #! modified path result_folder = './result_bit' #!modified path if not os.path.isdir(result_folder): os.mkdir(result_folder) def test_net(net, image, text_threshold, link_threshold, low_text, cuda, poly): t0 = time.time() # resize img_resized, target_ratio, size_heatmap = imgproc.resize_aspect_ratio(image, args.canvas_size, interpolation=cv2.INTER_LINEAR, mag_ratio=args.mag_ratio) ratio_h = ratio_w = 1 / target_ratio # preprocessing x = imgproc.normalizeMeanVariance(img_resized) x = torch.from_numpy(x).permute(2, 0, 1) # [h, w, c] to [c, h, w]
parser.add_argument('--test_folder', default='/data/', type=str, help='folder path to input images') parser.add_argument('--refine', default=False, action='store_true', help='enable link refiner') parser.add_argument('--refiner_model', default='weights/craft_refiner_CTW1500.pth', type=str, help='pretrained refiner model') args = parser.parse_args() """ For test images in a folder """ image_list, _, _ = file_utils.get_files(args.input_folder) os.makedirs(args.output_folder, exist_ok=True) if args.debug: os.makedirs(os.path.join(args.output_folder, 'debug'), exist_ok=True) def test_net(net, image, text_threshold, link_threshold, low_text, cuda, poly, refine_net=None): t0 = time.time()
# ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ net = CRAFT() # Initialize net net.load_state_dict( craft_utils.copyStateDict( torch.load(os.getcwd() + '\\craft_mlt_25k.pth', map_location='cpu'))) # load pretrained weights net.eval() print("The required neural network has been successfully loaded...") # Set tessaract path pytesseract.pytesseract.tesseract_cmd = td # Load paths for all images in Input folder image_list, _, _ = file_utils.get_files(wd + '\\Input') print("All file names has been obtained...") # +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ # ++++++++++++++++++++ Function section +++++++++++++++++++ # +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ # Find coordinates of boxes where is a text def get_boxes(img_c): # Resize img_r, target_ratio, size_heatmap = imgproc.resize_aspect_ratio( img_c, square_size=square_size, interpolation=cv2.INTER_LINEAR,
CONFIG = { 'trained_model': 'weights/craft_mlt_25k.pth', 'text_threshold': 0.7, 'low_text': 0.4, 'link_threshold': 0.4, 'cuda': False, 'canvas_size': 1280, 'mag_ratio': 1.5, 'poly': False, 'show_time': False, 'test_folder': 'data/', 'refine': False, 'refiner_model': 'weights/craft_refiner_CTW1500.pth' } """ For test images in a folder """ image_list, _, _ = file_utils.get_files(CONFIG['test_folder']) result_folder = './result/' if not os.path.isdir(result_folder): os.mkdir(result_folder) def test_net(net, image, text_threshold, link_threshold, low_text, cuda, poly, refine_net=None): t0 = time.time()
def main(source, target): #changing variables to our source and target image_list, _, _ = file_utils.get_files(source) result_folder = target if not os.path.isdir(result_folder): os.mkdir(result_folder) # load net net = CRAFT() # initialize print('Loading weights from checkpoint (' + args.trained_model + ')') if args.cuda: net.load_state_dict(copyStateDict(torch.load(args.trained_model))) else: net.load_state_dict( copyStateDict(torch.load(args.trained_model, map_location='cpu'))) if args.cuda: net = net.cuda() net = torch.nn.DataParallel(net) cudnn.benchmark = False net.eval() # LinkRefiner refine_net = None if args.refine: from refinenet import RefineNet refine_net = RefineNet() print('Loading weights of refiner from checkpoint (' + args.refiner_model + ')') if args.cuda: refine_net.load_state_dict( copyStateDict(torch.load(args.refiner_model))) refine_net = refine_net.cuda() refine_net = torch.nn.DataParallel(refine_net) else: refine_net.load_state_dict( copyStateDict( torch.load(args.refiner_model, map_location='cpu'))) refine_net.eval() args.poly = True t = time.time() # load data for k, image_path in enumerate(image_list): print("Test image {:d}/{:d}: {:s}".format(k + 1, len(image_list), image_path), end='\r') image = imgproc.loadImage(image_path) bboxes, polys, score_text = test_net(net, image, args.text_threshold, args.link_threshold, args.low_text, args.cuda, args.poly, refine_net) # save score text filename, file_ext = os.path.splitext(os.path.basename(image_path)) mask_file = result_folder + "/res_" + filename + '_mask.jpg' cv2.imwrite(mask_file, score_text) file_utils.saveResult(image_path, image[:, :, ::-1], polys, dirname=result_folder) print("elapsed time : {}s".format(time.time() - t))
import file_utils import os import tensorflow as tf import time import cv2 import numpy as np import imgproc import craft_utils from craft_net import CRAFT result_folder = './synth_result/' """ For test images in a folder """ image_list_ic15, _, _ = file_utils.get_files('./eval_data_ic15/') image_list_ours, _, _ = file_utils.get_files('./choice/') if not os.path.isdir(result_folder): os.mkdir(result_folder) canvas_size = int(2240) mag_ratio = float(2) def test_net(net, image, text_threshold, link_threshold, low_text, cuda, poly,filename,result_folder=result_folder): t0 = time.time() img_resized, target_ratio, size_heatmap = imgproc.resize_aspect_ratio(image, canvas_size, interpolation=cv2.INTER_LINEAR, mag_ratio=mag_ratio) ratio_h = ratio_w = 1 / target_ratio # preprocessing x = imgproc.normalizeMeanVariance(img_resized)
def show_image(data_dir, class_id, image_id): image_name = get_files(data_dir, class_id)[image_id] image_file = get_file(data_dir, class_id, image_name) io.imshow(io.imread(image_file))
def __init__(self, test_folder, canvas_size, mag_ratio): self.image_list, _, _ = file_utils.get_files(test_folder) self.canvas_size = canvas_size self.mag_ratio = mag_ratio
def createDataset(args): file_utils.rm_all_dir(dir=opt.RECOGNITION_TRAIN_IMAGE_PATH) file_utils.mkdir(dir=[opt.RECOGNITION_TRAIN_IMAGE_PATH]) with codecs.open('./labels-2213.txt', 'r', encoding='utf-8') as f: labels = f.read().strip('\ufeff').splitlines() FONTS_PATH = opt.RECOGNITIOON_FONT_PATH CSV_PATH = opt.RECOGNITION_CSV_PATH IMAGE_PATH = opt.RECOGNITION_TRAIN_IMAGE_PATH fonts = glob.glob(os.path.join(FONTS_PATH, '*.ttf')) labels_csv = codecs.open(os.path.join(CSV_PATH), 'w', encoding='utf-8') print("[THE NUMBER OF FONTS : {}]".format(len(fonts))) cnt = 0 prev_cnt = 0 total = opt.NUM_CLASSES * len(fonts) * opt.MORPH_NUM if args.salt_pepper: total *= 2 if args.chunk_noise: total *= 2 for k, character in enumerate(labels): if cnt - prev_cnt > 5000: prev_cnt = cnt sys.stdout.write( 'TRAINING IMAGE GENERATION: ({}/{}) \r'.format(cnt, total)) sys.stdout.flush() for f in fonts: for v in range(opt.MORPH_NUM): image, drawing = make_canvas(width=opt.RECOG_IMAGE_WIDTH, height=opt.RECOG_IMAGE_HEIGHT, color=opt.RECOG_BACKGROUND) font_type = determine_font_size(font=f, size=opt.RECOG_FONT_SIZE) w, h = determine_canvas_size(canvas=drawing, label=character, font=font_type) make_letter(canvas=drawing, label=character, width=w, height=h, color=opt.RECOG_FONT_COLOR, font=font_type) morph_templete = np.array(image.copy()) kernel = np.ones((2, 2), np.uint8) if v == 1: morph_templete = cv2.erode(morph_templete, kernel, iterations=1) else: morph_templete = cv2.dilate(morph_templete, kernel, iterations=1) copy = morph_templete.copy() cnt += 1 copy = Image.fromarray(np.array(copy)) file_utils.saveImage(save_to=IMAGE_PATH, img=np.array(copy), index1=cnt, ext='.png') file_utils.saveCSV(save_to=IMAGE_PATH, dst=labels_csv, index=cnt, label=character, num=k, ext='.png') if args.salt_pepper: cnt += 11 copy = generate_salt_and_pepper_noise(copy) file_utils.saveImage(save_to=IMAGE_PATH, img=copy, index1=cnt, ext='.png') file_utils.saveCSV(save_to=IMAGE_PATH, dst=labels_csv, index=cnt, label=character, num=k, ext='.png') if args.chunk_noise: copy = generate_chunk_noise(copy) file_utils.saveImage(save_to=IMAGE_PATH, img=copy, index1=cnt, ext='.png') file_utils.saveCSV(save_to=IMAGE_PATH, dst=labels_csv, index=cnt, label=character, num=k, ext='.png') # added custom training data difficult to classify from webtoon if args.webtoon_data: tranfer_img_list, _, _, _ = file_utils.get_files(opt.RECOG_WEBTOON_TRAIN_DATA_PATH) label_mapper = file_utils.makeLabelMapper('./labels-2213.txt') test_txt = []; test_num = [] print("[CUSTOM HANGUL DIFFICULT DATASET GENERATION : {}]".format(len(tranfer_img_list))) text_labels = file_utils.loadText(opt.RECOG_WEBTOON_TRAIN_LABEL_PATH) for txt in text_labels[0]: test_num.append(label_mapper[0].tolist().index(txt)) test_txt.append(txt) for idx, in_path in enumerate(tranfer_img_list): k, character = test_num[idx], test_txt[idx] img = imgproc.loadImage(in_path) img = imgproc.cvtColorGray(img) for x in range(1): copy = img.copy() cnt += 1 copy = Image.fromarray(np.array(copy)) file_utils.saveImage(save_to=IMAGE_PATH, img=copy, index1=cnt, ext='.png') file_utils.saveCSV(save_to=IMAGE_PATH, dst=labels_csv, index=cnt, label=character, num=k, ext='.png') labels_csv.close()
def resume(track_paths): completed_track_paths = utils.get_file_names(spectrograms_directory) print('Previously completed spectrograms: ' + str(len(completed_track_paths))) incomplete_track_paths = [] for _, path in enumerate(track_paths): if os.path.splitext( os.path.basename(path))[0] not in completed_track_paths: incomplete_track_paths.append(path) return incomplete_track_paths if __name__ == '__main__': print('Getting music file paths...') track_paths = utils.get_files("Q:\\fma_full") print('Music files identified: ' + str(len(track_paths))) print('Checking previously created spectrograms...') track_paths = resume(track_paths) print('Number of spectrograms remaining to create: ' + str(len(track_paths))) pool = Pool(os.cpu_count()) pool.map(generate_spectrogram, track_paths) pool.terminate()
parser.add_argument('--ratio', default=2.0, type=float, help='height & width ratio of demo image') parser.add_argument('--demo_folder', default='./data/', type=str, help='folder path to demo images') parser.add_argument('--cuda', action='store_true', default=True, help='use cuda for inference') args = parser.parse_args() """ For test images in a folder """ image_list, _, _, name_list = file_utils.get_files(args.demo_folder) file_utils.rm_all_dir(dir='./result/') # clean directories for next test file_utils.mkdir(dir=[ './result/', './result/bubbles/', './result/cuts/', './result/demo/', './result/chars/' ]) # load net models = net_utils.load_net(args) # initialize and load weights spaces = [] # text recognition spacing word text_warp_items = [] # text to warp bubble image demos = [] # all demo image storage t = time.time()
if refine: from refinenet import RefineNet refine_net = RefineNet() print('Loading weights of refiner from checkpoint (' + refiner_model + ')') if cuda: refine_net.load_state_dict(copyStateDict(torch.load(refiner_model))) refine_net = refine_net.cuda() refine_net = torch.nn.DataParallel(refine_net) else: refine_net.load_state_dict( copyStateDict(torch.load(refiner_model, map_location='cpu'))) refine_net.eval() args.poly = True """ For test images in a folder """ image_list, _, _ = file_utils.get_files("data/") def detect(image_path): result_folder = "result/" + str(time.time()) + "/" if not os.path.isdir(result_folder): os.mkdir(result_folder) image = imgproc.loadImage(image_path) bboxes, polys, score_text = test_net(result_folder, net, image, text_threshold, link_threshold, low_text, cuda, args.poly, refine_net) return result_folder detect("data/plat1.png")
parser.add_argument('--trained_model', default='weights/craft_mlt_25k.pth', type=str, help='pretrained model') parser.add_argument('--text_threshold', default=0.7, type=float, help='text confidence threshold') parser.add_argument('--low_text', default=0.4, type=float, help='text low-bound score') parser.add_argument('--link_threshold', default=0.4, type=float, help='link confidence threshold') parser.add_argument('--cuda', default=True, type=str2bool, help='Use cuda to train model') parser.add_argument('--canvas_size', default=2240, type=int, help='image size for inference') parser.add_argument('--mag_ratio', default=2, type=float, help='image magnification ratio') parser.add_argument('--poly', default=False, action='store_true', help='enable polygon type') parser.add_argument('--show_time', default=False, action='store_true', help='show processing time') parser.add_argument('--test_folder', default='/data/', type=str, help='folder path to input images') args = parser.parse_args() """ For test images in a folder """ image_list, _, _ = file_utils.get_files('/data/CRAFT-pytorch/test') result_folder = '/data/CRAFT-pytorch/result/' if not os.path.isdir(result_folder): os.mkdir(result_folder) def test_net(net, image, text_threshold, link_threshold, low_text, cuda, poly): t0 = time.time() # resize img_resized, target_ratio, size_heatmap = imgproc.resize_aspect_ratio(image, args.canvas_size, interpolation=cv2.INTER_LINEAR, mag_ratio=args.mag_ratio) ratio_h = ratio_w = 1 / target_ratio # preprocessing x = imgproc.normalizeMeanVariance(img_resized) x = torch.from_numpy(x).permute(2, 0, 1) # [h, w, c] to [c, h, w]
def train(args): file_utils.rm_all_dir(dir='./train/cache/') # clean cache dataset_name = "voc_2007_trainval" args.set_cfgs = ['ANCHOR_SCALES', '[8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]', 'MAX_NUM_GT_BOXES', '20'] args.cfg_file = "cfgs/{}_ls.yml".format(args.backbone) if args.large_scale else "cfgs/{}.yml".format(args.backbone) if args.cfg_file is not None: cfg_from_file(args.cfg_file) if args.set_cfgs is not None: cfg_from_list(args.set_cfgs) np.random.seed(cfg.RNG_SEED) cfg.TRAIN.USE_FLIPPED = opt.BUBBLE_TRAIN_FLIP cfg.USE_GPU_NMS = opt.cuda _, _, _, name_lists = file_utils.get_files('./train/images/') file_utils.makeTrainIndex(names=name_lists, save_to='./train/trainval.txt') imdb, roidb, ratio_list, ratio_index = combined_roidb(dataset_name) train_size = len(roidb) print('TRAIN IMAGE NUM: {:d}'.format(len(roidb))) file_utils.mkdir(dir=[args.save_models]) sampler_batch = sampler(train_size, args.batch) dataset = roibatchLoader(roidb, ratio_list, ratio_index, args.batch,\ imdb.num_classes, training=True) dataloader = DataLoader(dataset, batch_size=args.batch, sampler=sampler_batch, num_workers=args.num_workers) im_data = Variable(torch.FloatTensor(1).cuda()) im_info = Variable(torch.FloatTensor(1).cuda()) num_boxes = Variable(torch.LongTensor(1).cuda()) gt_boxes = Variable(torch.FloatTensor(1).cuda()) fasterRCNN = resnet(imdb.classes, 101, pretrained=True, class_agnostic=False) fasterRCNN.create_architecture() lr = args.lr params = [] for key, value in dict(fasterRCNN.named_parameters()).items(): if value.requires_grad: if 'bias' in key: params += [{'params': [value], 'lr': lr * (cfg.TRAIN.DOUBLE_BIAS + 1),\ 'weight_decay': cfg.TRAIN.BIAS_DECAY and cfg.TRAIN.WEIGHT_DECAY or 0}] else: params += [{'params': [value], 'lr': lr, 'weight_decay': cfg.TRAIN.WEIGHT_DECAY}] if args.optimizer == "adam": lr = lr * 0.1 optimizer = torch.optim.Adam(params) elif args.optimizer == "sgd": optimizer = torch.optim.SGD(params, momentum=cfg.TRAIN.MOMENTUM) if opt.cuda: cfg.CUDA = True fasterRCNN.cuda() if args.resume: load_name = os.path.join(args.save_models, 'Speech-Bubble-Detector-{}-{}-{}.pth'.format(args.backbone, args.resume_epoch, args.resume_batch)) checkpoint = torch.load(load_name) args.session = checkpoint['session'] args.start_epoch = checkpoint['epoch'] fasterRCNN.load_state_dict(checkpoint['model']) optimizer.load_state_dict(checkpoint['optimizer']) lr = optimizer.param_groups[0]['lr'] if 'pooling_mode' in checkpoint.keys(): cfg.POOLING_MODE = checkpoint['pooling_mode'] if args.multi_gpus: fasterRCNN = nn.DataParallel(fasterRCNN) iters_per_epoch = int(train_size / args.batch) if args.use_tfboard: from tensorboardX import SummaryWriter logger = SummaryWriter("logs") args.max_epochs = args.epoch for epoch in range(1, args.epoch + 1): fasterRCNN.train() loss_temp = 0 start = time.time() if epoch % (args.lr_decay_step + 1) == 0: adjust_learning_rate(optimizer, args.lr_decay_gamma) lr *= args.lr_decay_gamma data_iter = iter(dataloader) for step in range(iters_per_epoch): data = next(data_iter) im_data.data.resize_(data[0].size()).copy_(data[0]) im_info.data.resize_(data[1].size()).copy_(data[1]) gt_boxes.data.resize_(data[2].size()).copy_(data[2]) num_boxes.data.resize_(data[3].size()).copy_(data[3]) fasterRCNN.zero_grad() rois, cls_prob, bbox_pred, \ rpn_loss_cls, rpn_loss_box, \ RCNN_loss_cls, RCNN_loss_bbox, \ rois_label = fasterRCNN(im_data, im_info, gt_boxes, num_boxes) loss = rpn_loss_cls.mean() + rpn_loss_box.mean() \ + RCNN_loss_cls.mean() + RCNN_loss_bbox.mean() loss_temp += loss.item() # backward optimizer.zero_grad() loss.backward() if args.backbone == "vgg16": clip_gradient(fasterRCNN, 10.) optimizer.step() if step % args.display_interval == 0: end = time.time() if step > 0: loss_temp /= (args.display_interval + 1) if args.multi_gpus: loss_rpn_cls = rpn_loss_cls.mean().item() loss_rpn_box = rpn_loss_box.mean().item() loss_rcnn_cls = RCNN_loss_cls.mean().item() loss_rcnn_box = RCNN_loss_bbox.mean().item() fg_cnt = torch.sum(rois_label.data.ne(0)) bg_cnt = rois_label.data.numel() - fg_cnt else: loss_rpn_cls = rpn_loss_cls.item() loss_rpn_box = rpn_loss_box.item() loss_rcnn_cls = RCNN_loss_cls.item() loss_rcnn_box = RCNN_loss_bbox.item() fg_cnt = torch.sum(rois_label.data.ne(0)) bg_cnt = rois_label.data.numel() - fg_cnt print("[epoch %d][iter %d/%d] loss: %.4f, lr: %.2e" \ % (epoch, step, iters_per_epoch, loss_temp, lr)) print("\t\t\tfg/bg=(%d/%d), time cost: %f" % (fg_cnt, bg_cnt, end - start)) print("\t\t\trpn_cls: %.4f, rpn_box: %.4f, rcnn_cls: %.4f, rcnn_box %.4f" \ % (loss_rpn_cls, loss_rpn_box, loss_rcnn_cls, loss_rcnn_box)) if args.use_tfboard: info = { 'loss': loss_temp, 'loss_rpn_cls': loss_rpn_cls, 'loss_rpn_box': loss_rpn_box, 'loss_rcnn_cls': loss_rcnn_cls, 'loss_rcnn_box': loss_rcnn_box } logger.add_scalars("logs_s_{}/losses".format(args.session), info, (epoch - 1) * iters_per_epoch + step) loss_temp = 0 start = time.time() save_name = args.save_models + args.backbone + '-' + str(epoch) + '.pth' save_checkpoint({ 'session': args.session, 'epoch': epoch + 1, 'model': fasterRCNN.module.state_dict() if args.multi_gpus else fasterRCNN.state_dict(), 'optimizer': optimizer.state_dict(), 'pooling_mode': cfg.POOLING_MODE, 'class_agnostic': False, }, save_name) print('save model: {}'.format(save_name)) if args.use_tfboard: logger.close()
start_idx = 1 else: start_idx = 0 new_state_dict = OrderedDict() for k, v in state_dict.items(): name = ".".join(k.split(".")[start_idx:]) new_state_dict[name] = v return new_state_dict def str2bool(v): return v.lower() in ("yes", "y", "true", "t", "1") """ For test images in a folder """ image_list, _, _ = file_utils.get_files('./data/icdar15/test_images') result_folder = './result/' if not os.path.isdir(result_folder): os.mkdir(result_folder) def test_net(net, image, text_threshold, link_threshold, low_text, cuda, poly, args=None): t0 = time.time()
action='store_true', help='enable polygon type') parser.add_argument('--show_time', default=True, action='store_true', help='show processing time') parser.add_argument('--test_folder', default='data/', type=str, help='folder path to input images') args = parser.parse_args() """ For test images in a folder """ # image_list, _, _ = file_utils.get_files('/storage/upload_complete/') # image_list, _, _ = file_utils.get_files('/storage/prep/') image_list, _, _ = file_utils.get_files( '/dataset/crawl/front_cmtnd_resized/image/') print(len(image_list)) result_folder = '/storage/result/' os.makedirs(result_folder, exist_ok=True) shutil.rmtree(result_folder) os.makedirs(result_folder, exist_ok=True) def test_net(net, image, text_threshold, link_threshold, low_text, cuda, poly): t0 = time.time() # resize img_resized, target_ratio, size_heatmap = imgproc.resize_aspect_ratio( image, args.canvas_size,
parser.add_argument('--poly', default=False, action='store_true', help='enable polygon type') parser.add_argument('--show_time', default=False, action='store_true', help='show processing time') parser.add_argument('--test_folder', default='./data/', type=str, help='folder path to input images') args = parser.parse_args() """ For test images in a folder """ image_list, _, _ = file_utils.get_files('./data/') result_folder = './data/result/' if not os.path.isdir(result_folder): os.mkdir(result_folder) def test_net(net, image, text_threshold, link_threshold, low_text, cuda, poly): t0 = time.time() # resize img_resized, target_ratio, size_heatmap = imgproc.resize_aspect_ratio( image, args.canvas_size, interpolation=cv2.INTER_LINEAR, mag_ratio=args.mag_ratio)
parser.add_argument('--test_folder', default='./text_detection/test', type=str, help='folder path to input images') parser.add_argument('--refine', default=False, action='store_true', help='enable link refiner') parser.add_argument('--refiner_model', default='weights/craft_refiner_CTW1500.pth', type=str, help='pretrained refiner model') args = parser.parse_args() """ For test images in a folder """ image_list, _, _ = file_utils.get_files(args.test_folder) result_folder = './text_detection/result/' if not os.path.isdir(result_folder): os.mkdir(result_folder) def test_net(net, image, text_threshold, link_threshold, low_text, cuda, poly, refine_net=None): t0 = time.time()
def test(): '''''' '''INITIALIZE MODEL AND LOAD PRETRAINED MODEL''' myNet = WTD() print('Loading model from defined path :' + config.PRETRAINED_MODEL_PATH) if config.cuda: myNet.load_state_dict( copyStateDict(torch.load(config.PRETRAINED_MODEL_PATH))) else: myNet.load_state_dict( copyStateDict( torch.load(config.PRETRAINED_MODEL_PATH, map_location='cpu'))) if config.cuda: myNet = myNet.cuda() myNet = torch.nn.DataParallel(myNet) cudnn.benchmark = False spacing_word = [] myNet.eval() t = time.time() ''' SET PATH ''' DEFAULT_PATH_LIST = [ config.TEST_IMAGE_PATH, config.TEST_PREDICTION_PATH, config.CANVAS_PATH, config.MASK_PATH, config.BBOX_PATH, config.RESULT_CHAR_PATH, config.SPACING_WORD_PATH ] for PATH in DEFAULT_PATH_LIST: if not os.path.isdir(PATH): os.mkdir(PATH) ''' LIST IMAGE FILE ''' img_list, _, _ = file_utils.get_files(config.TEST_IMAGE_PATH) ''' KICK OFF TEST PROCESS ''' for i, img in enumerate(img_list): print("TEST IMAGE: {:d}/{:d}: {:s}".format(i + 1, len(img_list), img)) ''' LOAD IMAGE ''' img = imgproc.loadImage(img) ''' ADJUST IMAGE SIZE AND MAKE BORDER LINE FOR BETTER TESTING ACCURACY ''' img = imgproc.adjustImageRatio(img) constant = imgproc.createImageBorder(img, img_size=config.target_size, color=config.white) index1 = file_utils.adjustImageNum(i, len(img_list)) copy_img = constant.copy() copy_img2 = constant.copy() copy_img3 = constant.copy() copy_img4 = constant.copy() ''' PASS THE TEST MODEL AND PREDICT BELOW 4 RESULTS ''' charBBoxes, wordBBoxes, lineBBoxes, heatmap = test_net( myNet, constant, config.text_threshold, config.link_threshold, config.low_text, config.cuda) file_utils.saveImage(dir=config.canvas_path, img=constant, index1=index1) file_utils.saveMask(dir=config.mask_path, heatmap=heatmap, index1=index1) chars_inside_line = [] words_inside_line = [] chars_inside_word = [] ''' CHECK THERE IS INER BBOX IN OUTER BBOX ''' for a in range(len(charBBoxes)): chars_inside_line.append( wtd_utils.checkAreaInsideContour(area=charBBoxes[a], contour=lineBBoxes)) for b in range(len(wordBBoxes)): words_inside_line.append( wtd_utils.checkAreaInsideContour(area=wordBBoxes[b], contour=lineBBoxes)) for c in range(len(charBBoxes)): chars_inside_word.append( wtd_utils.checkAreaInsideContour(area=charBBoxes[c], contour=wordBBoxes)) '''INNER BBOX SORTING''' charBBoxes, lineBBoxes = wtd_utils.sortAreaInsideContour( target=chars_inside_line, spacing_word=None) wordBBoxes, lineBBoxes = wtd_utils.sortAreaInsideContour( target=words_inside_line, spacing_word=None) count = wtd_utils.sortAreaInsideContour(target=chars_inside_word, spacing_word=wordBBoxes) spacing_word.append(count) tmp_charBBoxes = np.array(charBBoxes, dtype=np.float32).reshape(-1, 4, 2).copy() '''DRAW BBOX ON IMAGE''' file_utils.drawBBoxOnImage(dir=config.BBOX_PATH, img=copy_img2, index1=index1, boxes=charBBoxes, flags='char') file_utils.drawBBoxOnImage(dir=config.BBOX_PATH, img=copy_img3, index1=index1, boxes=wordBBoxes, flags='word') file_utils.drawBBoxOnImage(dir=config.BBOX_PATH, img=copy_img4, index1=index1, boxes=lineBBoxes, flags='line') '''MAKE FINAL CHARACTER IMAGE FOR RECOGNITION PROCESS''' for j, charBBox in enumerate(tmp_charBBoxes): index2 = file_utils.adjustImageNum(j, len(tmp_charBBoxes)) char = imgproc.cropBBoxOnImage(copy_img, charBBox) orig_char = imgproc.adjustImageBorder( char, img_size=config.recognition_input_size, color=config.white) thresh_char = wtd_utils.thresholding( orig_char, img_size=config.recognition_input_size) file_utils.saveImage(dir=config.orig_char_path, img=orig_char, index1=index1, index2=index2) file_utils.saveImage(dir=config.thresh_char_path, img=thresh_char, index1=index1, index2=index2) ''' GENERATE TEXT FILE FOR SPACEING WORD ''' file_utils.saveText(dir=config.blank_path, text=spacing_word, index1='spacing_word') print("TOTAL TIME : {}s".format(time.time() - t))