Esempio n. 1
0
def saveResult15(img_file, boxes, dirname='./result/', verticals=None, texts=None, save_img_=False, mode='train'):
    """ save text detection result one by one
    Args:
        img_file (str): image file name
        polys (array): array of result file
    Return:
        None
    """
    # make result file list
    filename, file_ext = os.path.splitext(os.path.basename(img_file))

    # result directory
    gt_file = os.path.join(dirname, "est_" + filename + '.txt')

    for dir in [dirname]:
        if not os.path.isdir(dir):
            os.mkdir(dir)
    if not(cg.file_exists(gt_file)):
        with open(gt_file, 'w') as f:
            for i, box in enumerate(boxes):
                poly = np.array(box).astype(np.int32).reshape((-1))

                if mode == 'train':
                    if texts is not None:
                        if texts[i] == '':
                            strResult = ','.join([str(p) for p in poly]) + ',' + '###' + '\r\n'
                        else:
                            strResult = ','.join([str(p) for p in poly]) + ',' + texts[i].replace(',', '.') + '\r\n'
                    else:
                        strResult = ','.join([str(p) for p in poly]) + ',' + '###' + '\r\n'
                else:
                    strResult = ','.join([str(p) for p in poly]) + '\r\n'

                f.write(strResult)
def main_crop(ini, common_info, logger=None):
    # Init. local variables
    vars = {}
    for key, val in ini.items():
        vars[key] = cs.replace_string_from_dict(val, common_info)

    model_name = common_info['ref_dir_name'].split('_')[0]

    ref_train_fpaths = sorted(
        cg.get_filepaths(vars['ref_train_path'],
                         extensions=cg.TEXT_EXTENSIONS))
    ref_test_fpaths = sorted(
        cg.get_filepaths(vars['ref_test_path'], extensions=cg.TEXT_EXTENSIONS))
    ref_fpaths = ref_train_fpaths + ref_test_fpaths
    logger.info(" [CROP] # Total ref. gt file size : {:d}.".format(
        len(ref_fpaths)))

    if cg.file_exists(vars['train_crop_path']):
        shutil.rmtree(vars['train_crop_path'])
        logger.info(
            f" [CROP] # Train crop dir. is already exist, it's removed !!! : {vars['train_crop_path']}"
        )
    if cg.file_exists(vars['test_crop_path']):
        shutil.rmtree(vars['test_crop_path'])
        logger.info(
            f" [CROP] # Test crop dir. is already exist, it's removed !!! : {vars['test_crop_path']}"
        )

    for ref_fpaths in [ref_train_fpaths, ref_test_fpaths]:
        if ref_fpaths is ref_train_fpaths:
            tar_mode = TRAIN
        elif ref_fpaths is ref_test_fpaths:
            tar_mode = TEST

        available_cpus = len(os.sched_getaffinity(0))
        mp_inputs = [(ref_fpath, vars, tar_mode, model_name)
                     for file_idx, ref_fpath in enumerate(ref_fpaths)]

        # Multiprocess func.
        mp.run(func=save_crop_images_by_reference_filepath,
               data=mp_inputs,
               n_workers=available_cpus,
               n_tasks=len(ref_fpaths),
               max_queue_size=len(ref_fpaths))

    return True
Esempio n. 3
0
def compress_files(src_dir, dst_dir, zip_fname='temp.zip'):
    filenames = os.listdir(src_dir)
    zip_fpath = os.path.join(dst_dir, zip_fname)
    zip = zipfile.ZipFile(zip_fpath, "w", zipfile.ZIP_DEFLATED)
    for filename in filenames:
        fpath = os.path.join(src_dir, filename)
        zip.write(fpath, filename)
    zip.close()
    if cg.file_exists(zip_fpath):
        return True
    else:
        return False
def crop_and_save_img(img_fpath, dirpath, save_path):
    rst_img_fpath = img_fpath.replace(dirpath, save_path)
    if cg.file_exists(rst_img_fpath):
        print(f'File already exists : {rst_img_fpath}')
        return False

    img = ig.imread(img_fpath)

    try:
        crop_box, ret_ = ip.get_binary_area_coordinates_by_threshold(
            img, min_thresh=127, max_thresh=255)
    except ValueError as e:
        print(e)
        print()

    if ret_:
        [[_, _], [_, max_y]] = crop_box
        crop_img = img[:max_y, :]

        border_color = ig.WHITE
        border_margin = 30
        rst_img = cv2.copyMakeBorder(crop_img,
                                     top=0,
                                     bottom=border_margin,
                                     left=0,
                                     right=0,
                                     borderType=cv2.BORDER_CONSTANT,
                                     value=border_color)
        save_img = rst_img
        print(f'Crop img is saved : {rst_img_fpath}')
    else:
        save_img = img
        print(f'Raw img is saved : {rst_img_fpath}')

    if cg.file_exists(rst_img_fpath):
        print(f'File is exist ! : {rst_img_fpath}')
        return False
    else:
        ig.imwrite(save_img, rst_img_fpath)
        return True
Esempio n. 5
0
def main(args, logger=None):
    # load net
    net = CRAFT(pretrained=False)  # initialize

    print('Loading weights from checkpoint {}'.format(args.model_path))
    if args.cuda:
        net.load_state_dict(copyStateDict(torch.load(args.model_path)))
    else:
        net.load_state_dict(
            copyStateDict(torch.load(args.model_path, map_location='cpu')))

    if args.cuda:
        net = net.cuda()
        net = torch.nn.DataParallel(net)
        cudnn.benchmark = False

    net.eval()

    t = time.time()

    # load data
    """ For test images in a folder """
    image_list, _, _ = file_utils.get_files(args.img_path)
    est_folder = os.path.join(args.rst_path, 'est')
    mask_folder = os.path.join(args.rst_path, 'mask')
    eval_folder = os.path.join(args.rst_path, 'eval')
    cg.folder_exists(est_folder, create_=True)
    cg.folder_exists(mask_folder, create_=True)
    cg.folder_exists(eval_folder, create_=True)

    for k, image_path in enumerate(image_list):
        print("Test image {:d}/{:d}: {:s}".format(k + 1, len(image_list),
                                                  image_path))
        image = imgproc.loadImage(image_path)
        # image = cv2.resize(image, dsize=(768, 768), interpolation=cv2.INTER_CUBIC) ##
        bboxes, polys, score_text = test_net(
            net,
            image,
            text_threshold=args.text_threshold,
            link_threshold=args.link_threshold,
            low_text=args.low_text,
            cuda=args.cuda,
            canvas_size=args.canvas_size,
            mag_ratio=args.mag_ratio,
            poly=args.poly,
            show_time=args.show_time)
        # save score text
        filename, file_ext = os.path.splitext(os.path.basename(image_path))
        mask_file = mask_folder + "/res_" + filename + '_mask.jpg'
        if not (cg.file_exists(mask_file)):
            cv2.imwrite(mask_file, score_text)

        file_utils.saveResult15(image_path,
                                bboxes,
                                dirname=est_folder,
                                mode='test')

    eval_dataset(est_folder=est_folder,
                 gt_folder=args.gt_path,
                 eval_folder=eval_folder,
                 dataset_type=args.dataset_type)
    print("elapsed time : {}s".format(time.time() - t))
def main_split(ini, common_info, logger=None):
    # Init. local variables
    vars = {}
    for key, val in ini.items():
        vars[key] = cs.replace_string_from_dict(val, common_info)

    cg.directory_exists(vars['train_gt_path'], create_=True)
    cg.directory_exists(vars['test_gt_path'], create_=True)

    if cg.file_exists(vars['train_gt_path']):
        print(" @ Warning: train gt file path, {}, already exists".format(
            vars["train_gt_path"]))
        # ans = input(" % Proceed (y/n) ? ")
        # if ans.lower() != 'y':
        #     sys.exit()
    if cg.file_exists(vars['test_gt_path']):
        print(" @ Warning: test gt file path, {}, already exists".format(
            vars["test_gt_path"]))
        # ans = input(" % Proceed (y/n) ? ")
        # if ans.lower() != 'y':
        #     sys.exit()

    # read gt. file
    with open(os.path.join(vars['gt_path'], "labels.txt"),
              "r",
              encoding="utf8") as f:
        crnn_gt_list = f.readlines()

    # train_ratio = float(ini['train_ratio'])
    # test_ratio = (1 - train_ratio)
    # train_gt_list, test_gt_list = train_test_split(gt_list, train_size=train_ratio, random_state=2000)

    # Match CRAFT TRAIN & TEST
    ref_train_list = sorted(
        cg.get_filepaths(vars['ref_train_path'],
                         extensions=cg.TEXT_EXTENSIONS))
    ref_test_list = sorted(
        cg.get_filepaths(vars['ref_test_path'], extensions=cg.TEXT_EXTENSIONS))

    crnn_train_list = []
    crnn_test_list = []
    for crnn_gt in crnn_gt_list:
        crnn_fname = crnn_gt.split('\t')[0][:-13] + '.txt'
        if 'craft' in common_info['ref_dir_name']:
            ref_train_fname = os.path.join(vars['ref_train_path'],
                                           'gt_' + crnn_fname)
            ref_test_fname = os.path.join(vars['ref_test_path'],
                                          'gt_' + crnn_fname)
        else:
            ref_train_fname = os.path.join(vars['ref_train_path'], crnn_fname)
            ref_test_fname = os.path.join(vars['ref_test_path'], crnn_fname)

        if ref_train_fname in ref_train_list:
            crnn_train_list.append(crnn_gt)
        elif ref_test_fname in ref_test_list:
            crnn_test_list.append(crnn_gt)

    # Save train.txt file
    train_fpath = os.path.join(vars['train_gt_path'], 'labels.txt')
    with open(train_fpath, 'w') as f:
        f.write(''.join(crnn_train_list))

    test_fpath = os.path.join(vars['test_gt_path'], 'labels.txt')
    with open(test_fpath, 'w') as f:
        f.write(''.join(crnn_test_list))

    logger.info(" [SPLIT] # Train : Test ratio -> {} % : {} %".format(
        int(len(crnn_train_list) / len(crnn_gt_list) * 100),
        int(len(crnn_test_list) / len(crnn_gt_list) * 100)))
    logger.info(" [SPLIT] # Train : Test size  -> {} : {}".format(
        len(crnn_train_list), len(crnn_test_list)))
    return True
def save_crop_images_by_reference_filepath(
    ref_fpath, vars, tar_mode, model_name=ModelName.YOLOv5.name.lower()):
    # Load img info
    _, core_name, _ = cg.split_filepath(ref_fpath)
    if model_name == ModelName.CRAFT.name.lower():
        img_fname = core_name.replace('gt_', '')
    elif model_name == ModelName.YOLOv5.name.lower():
        img_fname = core_name

    low_tar_mode = tar_mode.lower()  # train / test
    raw_img_path = os.path.join(vars[f'{low_tar_mode}_img_path'],
                                img_fname + '.jpg')

    if not (cg.file_exists(raw_img_path, print_=True)):
        print("  # Raw image doesn't exists at {}".format(raw_img_path))
        return False

    img = ig.imread(raw_img_path, color_fmt='RGB')
    h, w, c = img.shape

    # load yolov5 gt. files
    text_boxes = []
    with open(ref_fpath, "r", encoding="utf8") as f:
        ref_infos = f.readlines()
        for idx, ref_info in enumerate(ref_infos):
            coco_data = ref_info.replace('\n', '').split(' ')
            if len(coco_data) != 5:
                continue
            class_num = int(coco_data[0])
            if class_num != ObjNum.KO.value:
                continue

            max_x_plus_min_x, max_x_minus_min_x = float(
                coco_data[1]) * 2 * w, float(coco_data[3]) * w
            max_y_plus_min_y, max_y_minus_min_y = float(
                coco_data[2]) * 2 * h, float(coco_data[4]) * h

            double_min_x, double_max_x = (
                max_x_plus_min_x - max_x_minus_min_x), (max_x_plus_min_x +
                                                        max_x_minus_min_x)
            double_min_y, double_max_y = (
                max_y_plus_min_y - max_y_minus_min_y), (max_y_plus_min_y +
                                                        max_y_minus_min_y)
            min_x, max_x = int(double_min_x / 2), int(double_max_x / 2)
            min_y, max_y = int(double_min_y / 2), int(double_max_y / 2)

            text_boxes.append([[min_x, min_y], [max_x, max_y]])

        if text_boxes:
            for t_idx, t_box in enumerate(text_boxes):
                [[min_x, min_y], [max_x, max_y]] = t_box
                crop_img_fname = img_fname + '_crop_' + '{0:03d}'.format(t_idx)
                rst_fpath = os.path.join(vars[f'{low_tar_mode}_crop_path'],
                                         crop_img_fname + '.jpg')

                crop_img = img[min_y:max_y, min_x:max_x]

                ig.imwrite(crop_img, rst_fpath)
                print("  #  ({:d}/{:d}) Saved at {} ".format(
                    idx, len(ref_infos), rst_fpath))
        else:
            print(f"  #  Reference gt is empty !!! : {ref_fpath}")

    return True
Esempio n. 8
0
def main_split(ini, common_info, logger=None):
    # Init. local variables
    vars = {}
    for key, val in ini.items():
        vars[key] = cs.replace_string_from_dict(val, common_info)

    cg.folder_exists(vars['img_path'], create_=False)

    if cg.file_exists(vars['train_path']):
        print(" @ Warning: train text file path, {}, already exists".format(
            vars["train_path"]))
        ans = input(" % Proceed (y/n) ? ")
        if ans.lower() != 'y':
            sys.exit()
    if cg.file_exists(vars['val_path']):
        print(" @ Warning: test text file path, {}, already exists".format(
            vars["val_path"]))
        ans = input(" % Proceed (y/n) ? ")
        if ans.lower() != 'y':
            sys.exit()

    # Apply symbolic link for img path
    raw_path = os.path.join(_project_folder_, vars['raw_path'])
    img_path = os.path.join(_project_folder_, vars['img_path'])
    cg.folder_exists(img_path, create_=True)

    img_fnames = sorted(
        cg.get_filenames(img_path, extensions=ig.IMG_EXTENSIONS))
    if len(img_fnames) == 0:
        sym_cmd = "ln -s {} {}".format(raw_path + '*',
                                       img_path)  # to all files
        subprocess.call(sym_cmd, shell=True)

    img_fnames = sorted(
        cg.get_filenames(img_path, extensions=ig.IMG_EXTENSIONS))

    train_ratio = float(vars['train_ratio'])
    test_ratio = (1.0 - train_ratio)

    train_img_list, test_img_list = train_test_split(img_fnames,
                                                     test_size=test_ratio,
                                                     random_state=2000)
    # Save train.txt file
    train_path = os.path.join(_project_folder_, vars['train_path'])
    with open(train_path, 'w') as f:
        f.write('\n'.join(train_img_list) + '\n')

    val_path = os.path.join(_project_folder_, vars['val_path'])
    with open(val_path, 'w') as f:
        f.write('\n'.join(test_img_list) + '\n')

    logger.info(" [SPLIT] # Train : Test ratio -> {} : {}".format(
        train_ratio, test_ratio))
    logger.info(" [SPLIT] # Train : Test size  -> {} : {}".format(
        len(train_img_list), len(test_img_list)))

    # Modify yaml file
    ref_yaml_path = os.path.join(_project_folder_, vars['ref_yaml_path'])
    with open(ref_yaml_path, 'r') as f:
        data = yaml.safe_load(f)

    data['train'] = os.path.join(_project_folder_, vars['train_path'])
    data['val'] = os.path.join(_project_folder_, vars['val_path'])
    data['names'] = common_info['obj_names'].replace(' ', '').split(',')
    data['nc'] = len(data['names'])

    # Save yaml file
    rst_yaml_path = os.path.join(_project_folder_, vars['rst_yaml_path'])
    with open(rst_yaml_path, 'w') as f:
        yaml.dump(data, f)
        pprint(data)

    logger.info(" # {} in {} mode finished.".format(_this_basename_, OP_MODE))
    return True
Esempio n. 9
0
def main_generate(ini, common_info, logger=None):
    # Init. local variables
    vars = {}
    for key, val in ini.items():
        vars[key] = cs.replace_string_from_dict(val, common_info)

    label_path = os.path.join(_project_folder_, vars['label_path'])
    cg.folder_exists(label_path, create_=True)

    raw_path = os.path.join(_project_folder_, vars['raw_path'])
    ann_path = os.path.join(_project_folder_, vars['ann_path'])
    raw_fnames = sorted(
        cg.get_filenames(raw_path, extensions=ig.IMG_EXTENSIONS))
    ann_fnames = sorted(
        cg.get_filenames(ann_path, extensions=jg.META_EXTENSION))
    logger.info(
        " [GENERATE] # Total file number to be processed: {:d}.".format(
            len(raw_fnames)))

    for idx, raw_fname in enumerate(raw_fnames):
        _, raw_core_name, raw_ext = cg.split_fname(raw_fname)
        img = ig.imread(raw_fname, color_fmt='RGB')
        h, w, c = img.shape

        # Load json
        ann_fname = ann_fnames[idx]

        _, ann_core_name, _ = cg.split_fname(ann_fname)
        if ann_core_name == raw_core_name + raw_ext:
            with open(ann_fname) as json_file:
                json_data = json.load(json_file)
                objects = json_data['objects']
                # pprint.pprint(objects)

        # Extract crop position
        obj_names = common_info['obj_names'].replace(' ', '').split(',')
        obj_type = common_info['obj_type']
        for obj in objects:
            obj_name = obj['classTitle']

            if obj_name not in obj_names:
                continue

            class_num = ObjInfo(obj_type, obj_name).get_class_number()

            [x1, y1], [x2, y2] = obj['points']['exterior']
            x_min, y_min, x_max, y_max = int(min(x1, x2)), int(min(
                y1, y2)), int(max(x1, x2)), int(max(y1, y2))
            if x_max - x_min <= 0 or y_max - y_min <= 0:
                continue

            # Save object info to COCO format
            rst_fpath = os.path.join(
                _project_folder_, vars['label_path'] + raw_core_name + '.txt')
            class_no, x_center, y_center, width, height = \
                str(class_num), str(((x_max+x_min)/2) / w), str(((y_max+y_min)/2) / h), str((x_max-x_min)/w), str((y_max-y_min)/h)

            if cg.file_exists(rst_fpath):
                logger.info(
                    " [GENERATE] # File already exist {} ({:d}/{:d})".format(
                        rst_fpath, (idx + 1), len(raw_fnames)))
            else:
                with open(rst_fpath, 'a') as f:
                    strResult = "{} {} {} {} {}\r\n".format(
                        class_no, x_center, y_center, width, height)
                    f.write(strResult)

                logger.info(
                    " [GENERATE] # File is saved {} ({:d}/{:d})".format(
                        rst_fpath, (idx + 1), len(raw_fnames)))

    logger.info(" # {} in {} mode finished.".format(_this_basename_, OP_MODE))
    return True