Esempio n. 1
0
    def __init__(self, images_path, labels_path, image_size):
        self.image_size = image_size
        self.image_list, _, _, _ = file_utils.get_files(images_path)
        _, _, self.label_list, _ = file_utils.get_files(labels_path)

        self.gaussian_generator = GenerateGaussian(1024, opt.gaussian_region,
                                                   opt.gaussian_affinity)
def eval2015(net,
             test_folder,
             result_folder,
             text_threshold=0.7,
             link_threshold=0.4,
             low_text=0.4):
    image_list, _, _ = file_utils.get_files(test_folder)
    t = time.time()
    res_gt_folder = os.path.join(result_folder, 'gt')
    res_mask_folder = os.path.join(result_folder, 'mask')
    # load data
    for k, image_path in enumerate(image_list):
        print("Test image {:d}/{:d}: {:s}".format(k + 1, len(image_list),
                                                  image_path),
              end='\n')
        image = imgproc.loadImage(image_path)

        bboxes, polys, score_text = test_net(net, image, text_threshold,
                                             link_threshold, low_text, True,
                                             False, 2240, 1.5, False)

        # save score text
        filename, file_ext = os.path.splitext(os.path.basename(image_path))
        mask_file = os.path.join(res_mask_folder,
                                 "/res_" + filename + '_mask.jpg')
        cv2.imwrite(mask_file, score_text)

        file_utils.saveResult15(image_path, polys, dirname=res_gt_folder)

    eval_2015(os.path.join(result_folder, 'gt'))
    print("elapsed time : {}s".format(time.time() - t))
Esempio n. 3
0
def PredictDetection(args, net, image_path, opt, reco):
    """ For test images in a folder """
    image_list, _, _ = file_utils.get_files(args.test_folder)

    result_folder = './result/'
    if not os.path.isdir(result_folder):
        os.mkdir(result_folder)

    t = time.time()
    # load data
    # for k, image_path in enumerate(image_list):
        #print("Test image {:d}/{:d}: {:s}".format(k+1, len(image_list), image_path), end='\r')
    image = imgproc.loadImage(image_path)
    bboxes, score_text = test_net(net, image, args.text_threshold, args.link_threshold, args.low_text, args.cuda,args)

    # save score text
    #filename, file_ext = os.path.splitext(os.path.basename(image_path))
    #mask_file = result_folder + "/res_" + filename + '_mask.jpg'
    #cv2.imwrite(mask_file, score_text)

    fl = file_utils.saveResult(image_path, image[:, :, ::-1], bboxes, opt, reco, dirname=result_folder)

    print("elapsed time detecting : {}s".format(time.time() - t))
    log.info(f'elapsed time detecting : {time.time() - t}s')
    return fl
Esempio n. 4
0
def test_net(model=None,
             mapper=None,
             spaces=None,
             load_from=None,
             save_to=None):
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
    with torch.no_grad():
        image_name_nums = []
        res = []
        img_lists, _, _, name_list = file_utils.get_files(load_from)
        for name in name_list:
            image_name_nums.append(name.split('_')[0])
        for k, in_path in enumerate(img_lists):
            # data pre-processing for passing net
            image = imgproc.loadImage(in_path)
            image = imgproc.cvtColorGray(image)
            image = imgproc.tranformToTensor(image,
                                             opt.RECOG_TRAIN_SIZE).unsqueeze(0)
            image = image.to(device)
            y = model(image)
            _, pred = torch.max(y.data, 1)
            res.append(mapper[0][pred])
        # method for saving result, MODE: file | stdout | all
        ltr_utils.display_stdout(chars=res,
                                 space=spaces,
                                 img_name=image_name_nums,
                                 MODE='file',
                                 save_to=save_to)
Esempio n. 5
0
def infer_detection(impath,net,refine_net,args):
  #CRAFT
  """ For test images in a folder """
  image_list, _, _ = file_utils.get_files(impath)

  image_paths = []
  image_names = []
  #CUSTOMISE START
  start = impath

  result_folder = './Results/'
  data={}
  
  t = time.time()

  # load data
  for k, image_path in enumerate(image_list):
    print("Test image {:d}/{:d}: {:s}".format(k+1, len(image_list), image_path), end='\r')
    image = imgproc.loadImage(image_path)

    image_name=os.path.relpath(image_path, start)

    bboxes, polys, score_text, det_scores = test.test_net(net, image, args.text_threshold, args.link_threshold, args.low_text, args.cuda, args.poly, args, refine_net)
    bbox_score={}
    index=0
    for box,conf in zip(bboxes,det_scores):
      bbox_score[str(index)]={}
      bbox_score[str(index)]['detconf']=str(conf)
      bbox_score[str(index)]['box']=[]
      for coors in box:
        temp=[str(coors[0]),str(coors[1])]
        bbox_score[str(index)]['box'].append(temp)
      index+=1
    data[image_name]=bbox_score

    # for box_num in range(len(bboxes)):
    #   key = str (det_scores[box_num])
    #   item = bboxes[box_num]
    #   bbox_score[key]=item

    # data['word_bboxes'][k]=bbox_score

    # save score text
    # filename, file_ext = os.path.splitext(os.path.basename(image_path))
    # mask_file = result_folder + "/res_" + filename + '_mask.jpg'
    # cv2.imwrite(mask_file, score_text)

    # file_utils.saveResult(image_path, image[:,:,::-1], polys, dirname=result_folder)

  if not os.path.isdir('./Results'):
    os.mkdir('./Results')
  # data.to_csv('./Results_csv/data.csv', sep = ',', na_rep='Unknown')
  # print(data)
  with open('./Results/data.json', 'w') as jsonfile:
    json.dump(data, jsonfile)
    jsonfile.close()
  print("elapsed time : {}s".format(time.time() - t))
Esempio n. 6
0
def main(args, logger=None):
    # load net
    net = CRAFT(pretrained=False)  # initialize

    print('Loading weights from checkpoint {}'.format(args.model_path))
    if args.cuda:
        net.load_state_dict(copyStateDict(torch.load(args.model_path)))
    else:
        net.load_state_dict(
            copyStateDict(torch.load(args.model_path, map_location='cpu')))

    if args.cuda:
        net = net.cuda()
        net = torch.nn.DataParallel(net)
        cudnn.benchmark = False

    net.eval()

    t = time.time()

    # load data
    """ For test images in a folder """
    image_list, _, _ = file_utils.get_files(args.img_path)
    est_folder = os.path.join(args.rst_path, 'est')
    mask_folder = os.path.join(args.rst_path, 'mask')
    eval_folder = os.path.join(args.rst_path, 'eval')
    cg.folder_exists(est_folder, create_=True)
    cg.folder_exists(mask_folder, create_=True)
    cg.folder_exists(eval_folder, create_=True)

    for k, image_path in enumerate(image_list):
        print("Test image {:d}/{:d}: {:s}".format(k + 1, len(image_list),
                                                  image_path))
        image = imgproc.loadImage(image_path)
        # image = cv2.resize(image, dsize=(768, 768), interpolation=cv2.INTER_CUBIC) ##
        bboxes, polys, score_text = test_net(
            net,
            image,
            text_threshold=args.text_threshold,
            link_threshold=args.link_threshold,
            low_text=args.low_text,
            cuda=args.cuda,
            canvas_size=args.canvas_size,
            mag_ratio=args.mag_ratio,
            poly=args.poly,
            show_time=args.show_time)
        # save score text
        filename, file_ext = os.path.splitext(os.path.basename(image_path))
        mask_file = mask_folder + "/res_" + filename + '_mask.jpg'
        if not (cg.file_exists(mask_file)):
            cv2.imwrite(mask_file, score_text)

        file_utils.saveResult15(image_path,
                                bboxes,
                                dirname=est_folder,
                                mode='test')

    eval_dataset(est_folder=est_folder,
                 gt_folder=args.gt_path,
                 eval_folder=eval_folder,
                 dataset_type=args.dataset_type)
    print("elapsed time : {}s".format(time.time() - t))
Esempio n. 7
0
def test():
    ''' '''

    np.random.seed(config.RNG_SEED)
    labels = np.asarray(['__background__', 'speech'])
    '''INITIALIZE MODEL AND LOAD PRETRAINED MODEL'''
    layerNum = 101
    if config.BACKBONE == 'res152': layerNum = 152

    fasterRCNN = resnet(labels,
                        layerNum,
                        pretrained=False,
                        class_agnostic=False)
    fasterRCNN.create_architecture()

    print('Loading model from defined path :' + config.PRETRAINED_MODEL_PATH)

    if config.cuda:
        model = torch.load(config.PRETRAINED_MODEL_PATH)
    else:
        model = torch.load(config.PRETRAINED_MODEL_PATH,
                           map_location=(lambda storage, loc: storage))

    fasterRCNN.load_state_dict(model['model'])
    if 'pooling_mode' in model.keys(): cfg.POOLING_MODE = model['pooling_mode']

    if config.cuda:
        fasterRCNN.cuda()
        fasterRCNN = torch.nn.DataParallel(fasterRCNN)

    fasterRCNN.eval()
    t = time.time()

    with torch.no_grad():
        im_data = Variable(torch.FloatTensor(1).cuda())
        im_info = Variable(torch.FloatTensor(1).cuda())
        num_boxes = Variable(torch.LongTensor(1).cuda())
        gt_boxes = Variable(torch.FloatTensor(1).cuda())
        items = [im_data, im_info, num_boxes, gt_boxes]
    ''' LIST IMAGE FILE '''
    img_list, _, _ = file_utils.get_files(config.TEST_IMAGE_PATH)
    ''' KICK OFF TEST PROCESS '''
    for i, img in enumerate(img_list):
        sys.stdout.write('TEST IMAGES: {:d}/{:d}: {:s} \r'.format(
            i + 1, len(img_list), img))
        sys.stdout.flush()
        ''' LOAD IMAGE '''
        img = imgproc.loadImage(img)
        img_blob, img_scales = imgproc.getImageBlob(img)
        ''' PASS THE TEST MODEL AND PREDICT BELOW IM RESULTS '''
        alpha_img, vis_img, cuts, bubbles, texts = test_net(
            fasterRCNN, img, img_blob, img_scales, items, labels, i)
        fixed_i = file_utils.resultNameNumbering(origin=i, digit=len(img_list))

        for cut_idx, cut in enumerate(cuts):
            file_utils.saveImage(dir=config.CUT_PATH,
                                 img=cut,
                                 index1=fixed_i,
                                 index2=cut_idx,
                                 ext='.png')
        for bub_idx, bubble in enumerate(bubbles):
            file_utils.saveImage(dir=config.BUBBLE_PATH,
                                 img=bubble,
                                 index1=fixed_i,
                                 index2=bub_idx,
                                 ext='.png')
        for txt_idx, txt in enumerate(texts):
            file_utils.saveImage(dir=config.TEXT_PATH,
                                 img=txt,
                                 index1=fixed_i,
                                 index2=txt_idx,
                                 ext='.png')

        file_utils.saveImage(dir=config.FINAL_IMAGE_PATH,
                             img=vis_img,
                             index1=fixed_i,
                             ext='.jpg')

    print("TOTAL TIME : {}s".format(time.time() - t))
        start_idx = 1
    else:
        start_idx = 0
    new_state_dict = OrderedDict()
    for k, v in state_dict.items():
        name = ".".join(k.split(".")[start_idx:])
        new_state_dict[name] = v
    return new_state_dict

def str2bool(v):
    return v.lower() in ("yes", "y", "true", "t", "1")



""" For test images in a folder """
image_list, _, _ = file_utils.get_files('test/pics')  #! modified path

result_folder = './result_bit' #!modified path
if not os.path.isdir(result_folder):
    os.mkdir(result_folder)

def test_net(net, image, text_threshold, link_threshold, low_text, cuda, poly):
    t0 = time.time()

    # resize
    img_resized, target_ratio, size_heatmap = imgproc.resize_aspect_ratio(image, args.canvas_size, interpolation=cv2.INTER_LINEAR, mag_ratio=args.mag_ratio)
    ratio_h = ratio_w = 1 / target_ratio

    # preprocessing
    x = imgproc.normalizeMeanVariance(img_resized)
    x = torch.from_numpy(x).permute(2, 0, 1)    # [h, w, c] to [c, h, w]
Esempio n. 9
0
parser.add_argument('--test_folder',
                    default='/data/',
                    type=str,
                    help='folder path to input images')
parser.add_argument('--refine',
                    default=False,
                    action='store_true',
                    help='enable link refiner')
parser.add_argument('--refiner_model',
                    default='weights/craft_refiner_CTW1500.pth',
                    type=str,
                    help='pretrained refiner model')

args = parser.parse_args()
""" For test images in a folder """
image_list, _, _ = file_utils.get_files(args.input_folder)

os.makedirs(args.output_folder, exist_ok=True)
if args.debug:
    os.makedirs(os.path.join(args.output_folder, 'debug'), exist_ok=True)


def test_net(net,
             image,
             text_threshold,
             link_threshold,
             low_text,
             cuda,
             poly,
             refine_net=None):
    t0 = time.time()
Esempio n. 10
0
# ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

net = CRAFT()  # Initialize net
net.load_state_dict(
    craft_utils.copyStateDict(
        torch.load(os.getcwd() + '\\craft_mlt_25k.pth',
                   map_location='cpu')))  # load pretrained weights
net.eval()

print("The required neural network has been successfully loaded...")

# Set tessaract path
pytesseract.pytesseract.tesseract_cmd = td

# Load paths for all images in Input folder
image_list, _, _ = file_utils.get_files(wd + '\\Input')

print("All file names has been obtained...")

# +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
# ++++++++++++++++++++ Function section +++++++++++++++++++
# +++++++++++++++++++++++++++++++++++++++++++++++++++++++++


# Find coordinates of boxes where is a text
def get_boxes(img_c):
    # Resize
    img_r, target_ratio, size_heatmap = imgproc.resize_aspect_ratio(
        img_c,
        square_size=square_size,
        interpolation=cv2.INTER_LINEAR,
Esempio n. 11
0
CONFIG = {
    'trained_model': 'weights/craft_mlt_25k.pth',
    'text_threshold': 0.7,
    'low_text': 0.4,
    'link_threshold': 0.4,
    'cuda': False,
    'canvas_size': 1280,
    'mag_ratio': 1.5,
    'poly': False,
    'show_time': False,
    'test_folder': 'data/',
    'refine': False,
    'refiner_model': 'weights/craft_refiner_CTW1500.pth'
}
""" For test images in a folder """
image_list, _, _ = file_utils.get_files(CONFIG['test_folder'])

result_folder = './result/'
if not os.path.isdir(result_folder):
    os.mkdir(result_folder)


def test_net(net,
             image,
             text_threshold,
             link_threshold,
             low_text,
             cuda,
             poly,
             refine_net=None):
    t0 = time.time()
Esempio n. 12
0
def main(source, target):
    #changing variables to our source and target
    image_list, _, _ = file_utils.get_files(source)

    result_folder = target
    if not os.path.isdir(result_folder):
        os.mkdir(result_folder)

    # load net
    net = CRAFT()  # initialize

    print('Loading weights from checkpoint (' + args.trained_model + ')')
    if args.cuda:
        net.load_state_dict(copyStateDict(torch.load(args.trained_model)))
    else:
        net.load_state_dict(
            copyStateDict(torch.load(args.trained_model, map_location='cpu')))

    if args.cuda:
        net = net.cuda()
        net = torch.nn.DataParallel(net)
        cudnn.benchmark = False

    net.eval()

    # LinkRefiner
    refine_net = None
    if args.refine:
        from refinenet import RefineNet
        refine_net = RefineNet()
        print('Loading weights of refiner from checkpoint (' +
              args.refiner_model + ')')
        if args.cuda:
            refine_net.load_state_dict(
                copyStateDict(torch.load(args.refiner_model)))
            refine_net = refine_net.cuda()
            refine_net = torch.nn.DataParallel(refine_net)
        else:
            refine_net.load_state_dict(
                copyStateDict(
                    torch.load(args.refiner_model, map_location='cpu')))

        refine_net.eval()
        args.poly = True

    t = time.time()

    # load data
    for k, image_path in enumerate(image_list):
        print("Test image {:d}/{:d}: {:s}".format(k + 1, len(image_list),
                                                  image_path),
              end='\r')
        image = imgproc.loadImage(image_path)

        bboxes, polys, score_text = test_net(net, image, args.text_threshold,
                                             args.link_threshold,
                                             args.low_text, args.cuda,
                                             args.poly, refine_net)

        # save score text
        filename, file_ext = os.path.splitext(os.path.basename(image_path))
        mask_file = result_folder + "/res_" + filename + '_mask.jpg'
        cv2.imwrite(mask_file, score_text)

        file_utils.saveResult(image_path,
                              image[:, :, ::-1],
                              polys,
                              dirname=result_folder)

    print("elapsed time : {}s".format(time.time() - t))
Esempio n. 13
0
import file_utils
import os
import tensorflow as tf
import time
import cv2
import numpy as np

import imgproc
import craft_utils
from craft_net import CRAFT

result_folder = './synth_result/'

""" For test images in a folder """
image_list_ic15, _, _ = file_utils.get_files('./eval_data_ic15/')
image_list_ours, _, _ = file_utils.get_files('./choice/')

if not os.path.isdir(result_folder):
    os.mkdir(result_folder)

canvas_size = int(2240)
mag_ratio = float(2)


def test_net(net, image, text_threshold, link_threshold, low_text, cuda, poly,filename,result_folder=result_folder):
    t0 = time.time()
    img_resized, target_ratio, size_heatmap = imgproc.resize_aspect_ratio(image, canvas_size, interpolation=cv2.INTER_LINEAR, mag_ratio=mag_ratio)
    ratio_h = ratio_w = 1 / target_ratio

    # preprocessing
    x = imgproc.normalizeMeanVariance(img_resized)
Esempio n. 14
0
def show_image(data_dir, class_id, image_id):
    image_name = get_files(data_dir, class_id)[image_id]
    image_file = get_file(data_dir, class_id, image_name)
    io.imshow(io.imread(image_file))
Esempio n. 15
0
 def __init__(self, test_folder, canvas_size, mag_ratio):
     self.image_list, _, _ = file_utils.get_files(test_folder)
     self.canvas_size = canvas_size
     self.mag_ratio = mag_ratio
Esempio n. 16
0
def createDataset(args):

    file_utils.rm_all_dir(dir=opt.RECOGNITION_TRAIN_IMAGE_PATH)
    file_utils.mkdir(dir=[opt.RECOGNITION_TRAIN_IMAGE_PATH])

    with codecs.open('./labels-2213.txt', 'r', encoding='utf-8') as f:
        labels = f.read().strip('\ufeff').splitlines()

    FONTS_PATH = opt.RECOGNITIOON_FONT_PATH
    CSV_PATH = opt.RECOGNITION_CSV_PATH
    IMAGE_PATH = opt.RECOGNITION_TRAIN_IMAGE_PATH

    fonts = glob.glob(os.path.join(FONTS_PATH, '*.ttf'))
    labels_csv = codecs.open(os.path.join(CSV_PATH), 'w', encoding='utf-8')

    print("[THE NUMBER OF FONTS : {}]".format(len(fonts)))

    cnt = 0
    prev_cnt = 0
    total = opt.NUM_CLASSES * len(fonts) * opt.MORPH_NUM
    if args.salt_pepper: total *= 2
    if args.chunk_noise: total *= 2

    for k, character in enumerate(labels):

        if cnt - prev_cnt > 5000:
            prev_cnt = cnt
            sys.stdout.write(
                'TRAINING IMAGE GENERATION: ({}/{}) \r'.format(cnt, total))
            sys.stdout.flush()

        for f in fonts:

            for v in range(opt.MORPH_NUM):

                image, drawing = make_canvas(width=opt.RECOG_IMAGE_WIDTH, height=opt.RECOG_IMAGE_HEIGHT,
                                            color=opt.RECOG_BACKGROUND)
                font_type = determine_font_size(font=f, size=opt.RECOG_FONT_SIZE)
                w, h = determine_canvas_size(canvas=drawing, label=character, font=font_type)
                make_letter(canvas=drawing, label=character, width=w, height=h, color=opt.RECOG_FONT_COLOR, font=font_type)

                morph_templete = np.array(image.copy())
                kernel = np.ones((2, 2), np.uint8)

                if v == 1: morph_templete = cv2.erode(morph_templete, kernel, iterations=1)
                else: morph_templete = cv2.dilate(morph_templete, kernel, iterations=1)

                copy = morph_templete.copy()
                cnt += 1

                copy = Image.fromarray(np.array(copy))
                file_utils.saveImage(save_to=IMAGE_PATH, img=np.array(copy), index1=cnt, ext='.png')
                file_utils.saveCSV(save_to=IMAGE_PATH, dst=labels_csv, index=cnt, label=character, num=k, ext='.png')

                if args.salt_pepper:
                    cnt += 11
                    copy = generate_salt_and_pepper_noise(copy)
                    file_utils.saveImage(save_to=IMAGE_PATH, img=copy, index1=cnt, ext='.png')
                    file_utils.saveCSV(save_to=IMAGE_PATH, dst=labels_csv, index=cnt, label=character, num=k,
                                       ext='.png')
                if args.chunk_noise:
                    copy = generate_chunk_noise(copy)
                    file_utils.saveImage(save_to=IMAGE_PATH, img=copy, index1=cnt, ext='.png')
                    file_utils.saveCSV(save_to=IMAGE_PATH, dst=labels_csv, index=cnt, label=character, num=k,
                                       ext='.png')

    #  added custom training data difficult to classify from webtoon

    if args.webtoon_data:
        tranfer_img_list, _, _, _ = file_utils.get_files(opt.RECOG_WEBTOON_TRAIN_DATA_PATH)
        label_mapper = file_utils.makeLabelMapper('./labels-2213.txt')
        test_txt = []; test_num = []
        print("[CUSTOM HANGUL DIFFICULT DATASET GENERATION : {}]".format(len(tranfer_img_list)))
        text_labels = file_utils.loadText(opt.RECOG_WEBTOON_TRAIN_LABEL_PATH)
        for txt in text_labels[0]:
            test_num.append(label_mapper[0].tolist().index(txt))
            test_txt.append(txt)

        for idx, in_path in enumerate(tranfer_img_list):
            k, character = test_num[idx], test_txt[idx]
            img = imgproc.loadImage(in_path)
            img = imgproc.cvtColorGray(img)
            for x in range(1):
                copy = img.copy()
                cnt += 1

                copy = Image.fromarray(np.array(copy))
                file_utils.saveImage(save_to=IMAGE_PATH, img=copy, index1=cnt, ext='.png')
                file_utils.saveCSV(save_to=IMAGE_PATH, dst=labels_csv, index=cnt, label=character, num=k, ext='.png')

    labels_csv.close()

def resume(track_paths):
    completed_track_paths = utils.get_file_names(spectrograms_directory)

    print('Previously completed spectrograms: ' +
          str(len(completed_track_paths)))

    incomplete_track_paths = []
    for _, path in enumerate(track_paths):
        if os.path.splitext(
                os.path.basename(path))[0] not in completed_track_paths:
            incomplete_track_paths.append(path)

    return incomplete_track_paths


if __name__ == '__main__':
    print('Getting music file paths...')
    track_paths = utils.get_files("Q:\\fma_full")
    print('Music files identified: ' + str(len(track_paths)))

    print('Checking previously created spectrograms...')
    track_paths = resume(track_paths)
    print('Number of spectrograms remaining to create: ' +
          str(len(track_paths)))

    pool = Pool(os.cpu_count())
    pool.map(generate_spectrogram, track_paths)
    pool.terminate()
Esempio n. 18
0
parser.add_argument('--ratio',
                    default=2.0,
                    type=float,
                    help='height & width ratio of demo image')
parser.add_argument('--demo_folder',
                    default='./data/',
                    type=str,
                    help='folder path to demo images')
parser.add_argument('--cuda',
                    action='store_true',
                    default=True,
                    help='use cuda for inference')

args = parser.parse_args()
""" For test images in a folder """
image_list, _, _, name_list = file_utils.get_files(args.demo_folder)

file_utils.rm_all_dir(dir='./result/')  # clean directories for next test
file_utils.mkdir(dir=[
    './result/', './result/bubbles/', './result/cuts/', './result/demo/',
    './result/chars/'
])

# load net
models = net_utils.load_net(args)  # initialize and load weights

spaces = []  # text recognition spacing word
text_warp_items = []  # text to warp bubble image
demos = []  # all demo image storage
t = time.time()
Esempio n. 19
0
if refine:
    from refinenet import RefineNet
    refine_net = RefineNet()
    print('Loading weights of refiner from checkpoint (' + refiner_model + ')')
    if cuda:
        refine_net.load_state_dict(copyStateDict(torch.load(refiner_model)))
        refine_net = refine_net.cuda()
        refine_net = torch.nn.DataParallel(refine_net)
    else:
        refine_net.load_state_dict(
            copyStateDict(torch.load(refiner_model, map_location='cpu')))

    refine_net.eval()
    args.poly = True
""" For test images in a folder """
image_list, _, _ = file_utils.get_files("data/")


def detect(image_path):
    result_folder = "result/" + str(time.time()) + "/"
    if not os.path.isdir(result_folder):
        os.mkdir(result_folder)
    image = imgproc.loadImage(image_path)
    bboxes, polys, score_text = test_net(result_folder, net, image,
                                         text_threshold, link_threshold,
                                         low_text, cuda, args.poly, refine_net)
    return result_folder


detect("data/plat1.png")
parser.add_argument('--trained_model', default='weights/craft_mlt_25k.pth', type=str, help='pretrained model')
parser.add_argument('--text_threshold', default=0.7, type=float, help='text confidence threshold')
parser.add_argument('--low_text', default=0.4, type=float, help='text low-bound score')
parser.add_argument('--link_threshold', default=0.4, type=float, help='link confidence threshold')
parser.add_argument('--cuda', default=True, type=str2bool, help='Use cuda to train model')
parser.add_argument('--canvas_size', default=2240, type=int, help='image size for inference')
parser.add_argument('--mag_ratio', default=2, type=float, help='image magnification ratio')
parser.add_argument('--poly', default=False, action='store_true', help='enable polygon type')
parser.add_argument('--show_time', default=False, action='store_true', help='show processing time')
parser.add_argument('--test_folder', default='/data/', type=str, help='folder path to input images')

args = parser.parse_args()


""" For test images in a folder """
image_list, _, _ = file_utils.get_files('/data/CRAFT-pytorch/test')

result_folder = '/data/CRAFT-pytorch/result/'
if not os.path.isdir(result_folder):
    os.mkdir(result_folder)

def test_net(net, image, text_threshold, link_threshold, low_text, cuda, poly):
    t0 = time.time()

    # resize
    img_resized, target_ratio, size_heatmap = imgproc.resize_aspect_ratio(image, args.canvas_size, interpolation=cv2.INTER_LINEAR, mag_ratio=args.mag_ratio)
    ratio_h = ratio_w = 1 / target_ratio

    # preprocessing
    x = imgproc.normalizeMeanVariance(img_resized)
    x = torch.from_numpy(x).permute(2, 0, 1)    # [h, w, c] to [c, h, w]
Esempio n. 21
0
def train(args):
    file_utils.rm_all_dir(dir='./train/cache/')  # clean cache
    dataset_name = "voc_2007_trainval"
    args.set_cfgs = ['ANCHOR_SCALES', '[8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]', 'MAX_NUM_GT_BOXES', '20']
    args.cfg_file = "cfgs/{}_ls.yml".format(args.backbone) if args.large_scale else "cfgs/{}.yml".format(args.backbone)

    if args.cfg_file is not None: cfg_from_file(args.cfg_file)
    if args.set_cfgs is not None: cfg_from_list(args.set_cfgs)

    np.random.seed(cfg.RNG_SEED)

    cfg.TRAIN.USE_FLIPPED = opt.BUBBLE_TRAIN_FLIP
    cfg.USE_GPU_NMS = opt.cuda

    _, _, _, name_lists = file_utils.get_files('./train/images/')
    file_utils.makeTrainIndex(names=name_lists, save_to='./train/trainval.txt')
    imdb, roidb, ratio_list, ratio_index = combined_roidb(dataset_name)
    train_size = len(roidb)

    print('TRAIN IMAGE NUM: {:d}'.format(len(roidb)))

    file_utils.mkdir(dir=[args.save_models])

    sampler_batch = sampler(train_size, args.batch)

    dataset = roibatchLoader(roidb, ratio_list, ratio_index, args.batch,\
                             imdb.num_classes, training=True)

    dataloader = DataLoader(dataset, batch_size=args.batch,
                            sampler=sampler_batch, num_workers=args.num_workers)

    im_data = Variable(torch.FloatTensor(1).cuda())
    im_info = Variable(torch.FloatTensor(1).cuda())
    num_boxes = Variable(torch.LongTensor(1).cuda())
    gt_boxes = Variable(torch.FloatTensor(1).cuda())

    fasterRCNN = resnet(imdb.classes, 101, pretrained=True, class_agnostic=False)
    fasterRCNN.create_architecture()

    lr = args.lr

    params = []
    for key, value in dict(fasterRCNN.named_parameters()).items():
        if value.requires_grad:
            if 'bias' in key:
                params += [{'params': [value], 'lr': lr * (cfg.TRAIN.DOUBLE_BIAS + 1),\
                            'weight_decay': cfg.TRAIN.BIAS_DECAY and cfg.TRAIN.WEIGHT_DECAY or 0}]
            else:
                params += [{'params': [value], 'lr': lr, 'weight_decay': cfg.TRAIN.WEIGHT_DECAY}]

    if args.optimizer == "adam":
        lr = lr * 0.1
        optimizer = torch.optim.Adam(params)

    elif args.optimizer == "sgd":
        optimizer = torch.optim.SGD(params, momentum=cfg.TRAIN.MOMENTUM)

    if opt.cuda:
        cfg.CUDA = True
        fasterRCNN.cuda()

    if args.resume:
        load_name = os.path.join(args.save_models,
                                 'Speech-Bubble-Detector-{}-{}-{}.pth'.format(args.backbone, args.resume_epoch, args.resume_batch))
        checkpoint = torch.load(load_name)
        args.session = checkpoint['session']
        args.start_epoch = checkpoint['epoch']
        fasterRCNN.load_state_dict(checkpoint['model'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        lr = optimizer.param_groups[0]['lr']
        if 'pooling_mode' in checkpoint.keys():
            cfg.POOLING_MODE = checkpoint['pooling_mode']

    if args.multi_gpus:
        fasterRCNN = nn.DataParallel(fasterRCNN)

    iters_per_epoch = int(train_size / args.batch)

    if args.use_tfboard:
        from tensorboardX import SummaryWriter

        logger = SummaryWriter("logs")

    args.max_epochs = args.epoch
    for epoch in range(1, args.epoch + 1):

        fasterRCNN.train()
        loss_temp = 0
        start = time.time()

        if epoch % (args.lr_decay_step + 1) == 0:
            adjust_learning_rate(optimizer, args.lr_decay_gamma)
            lr *= args.lr_decay_gamma

        data_iter = iter(dataloader)
        for step in range(iters_per_epoch):
            data = next(data_iter)
            im_data.data.resize_(data[0].size()).copy_(data[0])
            im_info.data.resize_(data[1].size()).copy_(data[1])
            gt_boxes.data.resize_(data[2].size()).copy_(data[2])
            num_boxes.data.resize_(data[3].size()).copy_(data[3])

            fasterRCNN.zero_grad()
            rois, cls_prob, bbox_pred, \
            rpn_loss_cls, rpn_loss_box, \
            RCNN_loss_cls, RCNN_loss_bbox, \
            rois_label = fasterRCNN(im_data, im_info, gt_boxes, num_boxes)

            loss = rpn_loss_cls.mean() + rpn_loss_box.mean() \
                   + RCNN_loss_cls.mean() + RCNN_loss_bbox.mean()
            loss_temp += loss.item()

            # backward
            optimizer.zero_grad()
            loss.backward()
            if args.backbone == "vgg16":
                clip_gradient(fasterRCNN, 10.)
            optimizer.step()

            if step % args.display_interval == 0:
                end = time.time()
                if step > 0:
                    loss_temp /= (args.display_interval + 1)

                if args.multi_gpus:
                    loss_rpn_cls = rpn_loss_cls.mean().item()
                    loss_rpn_box = rpn_loss_box.mean().item()
                    loss_rcnn_cls = RCNN_loss_cls.mean().item()
                    loss_rcnn_box = RCNN_loss_bbox.mean().item()
                    fg_cnt = torch.sum(rois_label.data.ne(0))
                    bg_cnt = rois_label.data.numel() - fg_cnt
                else:
                    loss_rpn_cls = rpn_loss_cls.item()
                    loss_rpn_box = rpn_loss_box.item()
                    loss_rcnn_cls = RCNN_loss_cls.item()
                    loss_rcnn_box = RCNN_loss_bbox.item()
                    fg_cnt = torch.sum(rois_label.data.ne(0))
                    bg_cnt = rois_label.data.numel() - fg_cnt

                print("[epoch %d][iter %d/%d] loss: %.4f, lr: %.2e" \
                      % (epoch, step, iters_per_epoch, loss_temp, lr))
                print("\t\t\tfg/bg=(%d/%d), time cost: %f" % (fg_cnt, bg_cnt, end - start))
                print("\t\t\trpn_cls: %.4f, rpn_box: %.4f, rcnn_cls: %.4f, rcnn_box %.4f" \
                      % (loss_rpn_cls, loss_rpn_box, loss_rcnn_cls, loss_rcnn_box))
                if args.use_tfboard:
                    info = {
                        'loss': loss_temp,
                        'loss_rpn_cls': loss_rpn_cls,
                        'loss_rpn_box': loss_rpn_box,
                        'loss_rcnn_cls': loss_rcnn_cls,
                        'loss_rcnn_box': loss_rcnn_box
                    }
                    logger.add_scalars("logs_s_{}/losses".format(args.session), info,
                                       (epoch - 1) * iters_per_epoch + step)

                loss_temp = 0
                start = time.time()

        save_name = args.save_models + args.backbone + '-' + str(epoch) + '.pth'
        save_checkpoint({
            'session': args.session,
            'epoch': epoch + 1,
            'model': fasterRCNN.module.state_dict() if args.multi_gpus else fasterRCNN.state_dict(),
            'optimizer': optimizer.state_dict(),
            'pooling_mode': cfg.POOLING_MODE,
            'class_agnostic': False,
        }, save_name)
        print('save model: {}'.format(save_name))

    if args.use_tfboard:
        logger.close()
Esempio n. 22
0
        start_idx = 1
    else:
        start_idx = 0
    new_state_dict = OrderedDict()
    for k, v in state_dict.items():
        name = ".".join(k.split(".")[start_idx:])
        new_state_dict[name] = v
    return new_state_dict


def str2bool(v):
    return v.lower() in ("yes", "y", "true", "t", "1")


""" For test images in a folder """
image_list, _, _ = file_utils.get_files('./data/icdar15/test_images')

result_folder = './result/'
if not os.path.isdir(result_folder):
    os.mkdir(result_folder)


def test_net(net,
             image,
             text_threshold,
             link_threshold,
             low_text,
             cuda,
             poly,
             args=None):
    t0 = time.time()
Esempio n. 23
0
                    action='store_true',
                    help='enable polygon type')
parser.add_argument('--show_time',
                    default=True,
                    action='store_true',
                    help='show processing time')
parser.add_argument('--test_folder',
                    default='data/',
                    type=str,
                    help='folder path to input images')

args = parser.parse_args()
""" For test images in a folder """
# image_list, _, _ = file_utils.get_files('/storage/upload_complete/')
# image_list, _, _ = file_utils.get_files('/storage/prep/')
image_list, _, _ = file_utils.get_files(
    '/dataset/crawl/front_cmtnd_resized/image/')
print(len(image_list))

result_folder = '/storage/result/'
os.makedirs(result_folder, exist_ok=True)
shutil.rmtree(result_folder)
os.makedirs(result_folder, exist_ok=True)


def test_net(net, image, text_threshold, link_threshold, low_text, cuda, poly):
    t0 = time.time()

    # resize
    img_resized, target_ratio, size_heatmap = imgproc.resize_aspect_ratio(
        image,
        args.canvas_size,
Esempio n. 24
0
parser.add_argument('--poly',
                    default=False,
                    action='store_true',
                    help='enable polygon type')
parser.add_argument('--show_time',
                    default=False,
                    action='store_true',
                    help='show processing time')
parser.add_argument('--test_folder',
                    default='./data/',
                    type=str,
                    help='folder path to input images')

args = parser.parse_args()
""" For test images in a folder """
image_list, _, _ = file_utils.get_files('./data/')

result_folder = './data/result/'
if not os.path.isdir(result_folder):
    os.mkdir(result_folder)


def test_net(net, image, text_threshold, link_threshold, low_text, cuda, poly):
    t0 = time.time()

    # resize
    img_resized, target_ratio, size_heatmap = imgproc.resize_aspect_ratio(
        image,
        args.canvas_size,
        interpolation=cv2.INTER_LINEAR,
        mag_ratio=args.mag_ratio)
Esempio n. 25
0
parser.add_argument('--test_folder',
                    default='./text_detection/test',
                    type=str,
                    help='folder path to input images')
parser.add_argument('--refine',
                    default=False,
                    action='store_true',
                    help='enable link refiner')
parser.add_argument('--refiner_model',
                    default='weights/craft_refiner_CTW1500.pth',
                    type=str,
                    help='pretrained refiner model')

args = parser.parse_args()
""" For test images in a folder """
image_list, _, _ = file_utils.get_files(args.test_folder)

result_folder = './text_detection/result/'
if not os.path.isdir(result_folder):
    os.mkdir(result_folder)


def test_net(net,
             image,
             text_threshold,
             link_threshold,
             low_text,
             cuda,
             poly,
             refine_net=None):
    t0 = time.time()
Esempio n. 26
0
def test():
    ''''''
    '''INITIALIZE MODEL AND LOAD PRETRAINED MODEL'''
    myNet = WTD()
    print('Loading model from defined path :' + config.PRETRAINED_MODEL_PATH)
    if config.cuda:
        myNet.load_state_dict(
            copyStateDict(torch.load(config.PRETRAINED_MODEL_PATH)))
    else:
        myNet.load_state_dict(
            copyStateDict(
                torch.load(config.PRETRAINED_MODEL_PATH, map_location='cpu')))

    if config.cuda:
        myNet = myNet.cuda()
        myNet = torch.nn.DataParallel(myNet)
        cudnn.benchmark = False

    spacing_word = []
    myNet.eval()
    t = time.time()
    ''' SET PATH '''
    DEFAULT_PATH_LIST = [
        config.TEST_IMAGE_PATH, config.TEST_PREDICTION_PATH,
        config.CANVAS_PATH, config.MASK_PATH, config.BBOX_PATH,
        config.RESULT_CHAR_PATH, config.SPACING_WORD_PATH
    ]
    for PATH in DEFAULT_PATH_LIST:
        if not os.path.isdir(PATH): os.mkdir(PATH)
    ''' LIST IMAGE FILE '''
    img_list, _, _ = file_utils.get_files(config.TEST_IMAGE_PATH)
    ''' KICK OFF TEST PROCESS '''
    for i, img in enumerate(img_list):

        print("TEST IMAGE: {:d}/{:d}: {:s}".format(i + 1, len(img_list), img))
        ''' LOAD IMAGE '''
        img = imgproc.loadImage(img)
        ''' ADJUST IMAGE SIZE AND MAKE BORDER LINE FOR BETTER TESTING ACCURACY '''
        img = imgproc.adjustImageRatio(img)
        constant = imgproc.createImageBorder(img,
                                             img_size=config.target_size,
                                             color=config.white)

        index1 = file_utils.adjustImageNum(i, len(img_list))

        copy_img = constant.copy()
        copy_img2 = constant.copy()
        copy_img3 = constant.copy()
        copy_img4 = constant.copy()
        ''' PASS THE TEST MODEL AND PREDICT BELOW 4 RESULTS '''
        charBBoxes, wordBBoxes, lineBBoxes, heatmap = test_net(
            myNet, constant, config.text_threshold, config.link_threshold,
            config.low_text, config.cuda)

        file_utils.saveImage(dir=config.canvas_path,
                             img=constant,
                             index1=index1)
        file_utils.saveMask(dir=config.mask_path,
                            heatmap=heatmap,
                            index1=index1)

        chars_inside_line = []
        words_inside_line = []
        chars_inside_word = []
        ''' CHECK THERE IS INER BBOX IN OUTER BBOX '''
        for a in range(len(charBBoxes)):
            chars_inside_line.append(
                wtd_utils.checkAreaInsideContour(area=charBBoxes[a],
                                                 contour=lineBBoxes))
        for b in range(len(wordBBoxes)):
            words_inside_line.append(
                wtd_utils.checkAreaInsideContour(area=wordBBoxes[b],
                                                 contour=lineBBoxes))
        for c in range(len(charBBoxes)):
            chars_inside_word.append(
                wtd_utils.checkAreaInsideContour(area=charBBoxes[c],
                                                 contour=wordBBoxes))
        '''INNER BBOX SORTING'''
        charBBoxes, lineBBoxes = wtd_utils.sortAreaInsideContour(
            target=chars_inside_line, spacing_word=None)
        wordBBoxes, lineBBoxes = wtd_utils.sortAreaInsideContour(
            target=words_inside_line, spacing_word=None)
        count = wtd_utils.sortAreaInsideContour(target=chars_inside_word,
                                                spacing_word=wordBBoxes)
        spacing_word.append(count)

        tmp_charBBoxes = np.array(charBBoxes,
                                  dtype=np.float32).reshape(-1, 4, 2).copy()
        '''DRAW BBOX ON IMAGE'''
        file_utils.drawBBoxOnImage(dir=config.BBOX_PATH,
                                   img=copy_img2,
                                   index1=index1,
                                   boxes=charBBoxes,
                                   flags='char')
        file_utils.drawBBoxOnImage(dir=config.BBOX_PATH,
                                   img=copy_img3,
                                   index1=index1,
                                   boxes=wordBBoxes,
                                   flags='word')
        file_utils.drawBBoxOnImage(dir=config.BBOX_PATH,
                                   img=copy_img4,
                                   index1=index1,
                                   boxes=lineBBoxes,
                                   flags='line')
        '''MAKE FINAL CHARACTER IMAGE FOR RECOGNITION PROCESS'''
        for j, charBBox in enumerate(tmp_charBBoxes):
            index2 = file_utils.adjustImageNum(j, len(tmp_charBBoxes))
            char = imgproc.cropBBoxOnImage(copy_img, charBBox)
            orig_char = imgproc.adjustImageBorder(
                char,
                img_size=config.recognition_input_size,
                color=config.white)
            thresh_char = wtd_utils.thresholding(
                orig_char, img_size=config.recognition_input_size)
            file_utils.saveImage(dir=config.orig_char_path,
                                 img=orig_char,
                                 index1=index1,
                                 index2=index2)
            file_utils.saveImage(dir=config.thresh_char_path,
                                 img=thresh_char,
                                 index1=index1,
                                 index2=index2)
    ''' GENERATE TEXT FILE FOR SPACEING WORD '''
    file_utils.saveText(dir=config.blank_path,
                        text=spacing_word,
                        index1='spacing_word')

    print("TOTAL TIME : {}s".format(time.time() - t))