Example #1
0
    def test_folder(self, folder_path):

        image_list, _, _ = file_utils.get_files(folder_path)
        if not os.path.exists(pr.result_folder):
            os.mkdir(pr.result_folder)
        t = time.time()

        # load data
        for k, image_path in enumerate(image_list):
            print("Test image {:d}/{:d}: {:s}".format(k + 1, len(image_list),
                                                      image_path),
                  end='\r')

            bboxes, polys, score_text = self.text_detect(image_path)

            # save score text
            filename, file_ext = os.path.splitext(os.path.basename(image_path))
            mask_file = pr.result_folder + "/res_" + filename + '_mask.jpg'
            cv2.imwrite(mask_file, score_text)
            image = imgproc.loadImage(image_path)
            file_utils.saveResult(image_path,
                                  image[:, :, ::-1],
                                  polys,
                                  dirname=pr.result_folder)

        print("elapsed time : {}s".format(time.time() - t))
Example #2
0
def get_content_dict(answers_dir, spliter):
    total_content_dict = {}
    for content_file in file_utils.get_files(answers_dir):
        content_dict = dict(
            file_utils.read_line(content_file,
                                 lambda content: (content[0], content[1]),
                                 split=spliter))
        total_content_dict.update(content_dict)

    return total_content_dict
Example #3
0
    def from_files(cls,
                   path,
                   extensions=None,
                   recurse=True,
                   include=None,
                   **kwargs):
        audio_extensions = set(k for k, v in mimetypes.types_map.items()
                               if v.startswith('audio/'))

        if extensions is None: extensions = audio_extensions
        return cls(
            file_utils.get_files(path,
                                 extensions,
                                 recurse=recurse,
                                 include=include), path, **kwargs)
Example #4
0
def collect_dataset(directory):

    raw_text = ''
    raw_dataset = {}
    for folder in file_utils.get_folders(directory):
        folder_path = directory + folder + '/'
        new_samples = {}
        for txt_file in file_utils.get_files(folder_path, obj_type='txt'):
            t, lines = file_utils.load_text(txt_file, folder_path)
            new_samples[txt_file[:-4]] = lines
            raw_text += ' ' + t

        raw_dataset[folder] = new_samples



    return raw_text, raw_dataset
Example #5
0
def make_one(file_dir='../data/train_blocks_3_layer'):

    files = fu.get_files(file_dir)
    fu.safe_dir('blocks_1_layer')

    for im_name in files[:5]:
        print('working on: ', im_name)
        im_path = os.path.join(file_dir, im_name)
        im = io.imread(im_path)
        if len(im.shape) == 2:
            print('shape already 2d: ', im.shape)
            continue

        im_pil = Image.fromarray(im[:, :, 0])
        im_pil.save(im_path)

    print('done')
Example #6
0
def detect_img(yolo):
    cnt = 0
    while True:
        if cnt > 3:
            break
        cnt += 1
        img_p = input('Input image path:')
        files = file_utils.get_files(img_p)
        print("路径:", img_p, "中图片数量:", len(files))
        output_path = input('output image path:')
        for fp in files:
            print("预测图片:", fp)
            image = Image.open(fp)
            r_image = yolo.detect_image(image)
            base_name = os.path.basename(fp)
            r_image.save(os.path.join(output_path, base_name))
            is_continue = input("是否继续:y/n")
            if is_continue == 'n':
                break
    yolo.close_session()
def verify_download_files(c_id):
    file_list = get_files('video_cache_all\\586d23485f07127674135d4d')

    file_name_dict = dict()
    file_count_dict = dict()

    for filename in file_list:
        elements = filename.split('\\')
        key = "{0}-{1}".format(elements[2], elements[3])

        # file count
        if key not in file_count_dict:
            file_count_dict[key] = 0
        count = file_count_dict[key]
        file_count_dict[key] = count + 1

        # file name
        file_name_dict[key] = elements[4]

    for key, name in file_name_dict.items():
        count = file_count_dict[key]
        if name != "Wanmen-{0}.ts".format("%05d" % (count - 1)):
            print("{}: filename: {}, count: {}\n".format(key, name, count))
Example #8
0
if __name__ == "__main__":
    net = CRAFT(input_shape=(args.canvas_size, args.canvas_size, 3))
    checkpoint = tf.train.Checkpoint(model=net)
    checkpoint_dir = tf.train.latest_checkpoint(args.weight_dir)
    # checkpoint_dir = os.path.join(args.weight_dir, "ckpt-10")
    checkpoint.restore(checkpoint_dir)
    print("Restored from %s" % checkpoint_dir)
    # prefix_filename = datetime.datetime.now().strftime('%m%d_%H:%M:%S')
    prefix_filename = checkpoint_dir.split("/")[-1]

    # if not os.path.exists("./logs/fit/"):
    #     os.makedirs("logs/fit/")
    # log_dir = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
    # tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)

    image_list, _, _ = get_files(args.test_folder)
    result_folder = './result/'
    if not os.path.isdir(result_folder):
        os.mkdir(result_folder)

    # LinkRefiner
    refine_net = None
    # if args.refine:
    #     # TODO
    #     pass

    # load data
    for k, image_path in enumerate(image_list):
        print("Test image {:d}/{:d}: {:s}".format(k + 1, len(image_list),
                                                  image_path),
              end='\r')
Example #9
0
 def __init__(self, t_dir, res_dir='results'):
     self.t_dir = t_dir
     self.res_dir = res_dir
     self.file_list = fu.get_files(t_dir, 'results', 'results_np',
                                   'res_results')
     self.num_files = len(self.file_list)
Example #10
0
parser.add_argument('--low_text', default=0.4, type=float, help='text low-bound score')
parser.add_argument('--link_threshold', default=0.4, type=float, help='link confidence threshold')
parser.add_argument('--cuda', default=True, type=str2bool, help='Use cuda for inference')
parser.add_argument('--canvas_size', default=1280, type=int, help='image size for inference')
parser.add_argument('--mag_ratio', default=1.5, type=float, help='image magnification ratio')
parser.add_argument('--poly', default=False, action='store_true', help='enable polygon type')
parser.add_argument('--show_time', default=False, action='store_true', help='show processing time')
parser.add_argument('--test_folder', default='/imgs/', type=str, help='folder path to input images')
parser.add_argument('--refine', default=False, action='store_true', help='enable link refiner')
parser.add_argument('--refiner_model', default='weights/craft_refiner_CTW1500.pth', type=str, help='pretrained refiner model')

args = parser.parse_args()


""" For test images in a folder """
image_list, _, _ = file_utils.get_files(args.test_folder)

result_folder = './result/'
if not os.path.isdir(result_folder):
    os.mkdir(result_folder)

def test_net(net, image, text_threshold, link_threshold, low_text, cuda, poly, refine_net=None):
    t0 = time.time()

    # resize
    img_resized, target_ratio, size_heatmap = imgproc.resize_aspect_ratio(image, args.canvas_size, interpolation=cv2.INTER_LINEAR, mag_ratio=args.mag_ratio)
    ratio_h = ratio_w = 1 / target_ratio

    # preprocessing
    x = imgproc.normalizeMeanVariance(img_resized)
    x = torch.from_numpy(x).permute(2, 0, 1)    # [h, w, c] to [c, h, w]
Example #11
0
from utils.document import Document, extract_labels, merge_dicts
from utils.file_utils import get_files, save_files
import time
from tqdm import tqdm
from multiprocessing import Process, Queue

if __name__ == '__main__':
    start = time.time()
    # Indicate the path to the files from where the texts
    # and labels need to be extracted.
    main_dir = '/Users/samuelrodriguezmedina/Documents/ir4sdgs/crawl_sdgs/'
    folders = [
        'word', 'other_html', 'pdf', 'extra_pdf', 'extra_word', 'downloads',
        'downloadable_pdfs'
    ]
    files = get_files(main_dir, folders)
    final_labelled = {}
    final_unlabelled = {}
    q = Queue()

    for file in tqdm(files):
        p = Process(target=extract_labels, args=(
            doc,
            q,
        ))
        p.start()
        labelled, unlabelled = q.get()
        if labelled:
            final_labelled = merge_dicts(final_labelled, labelled)
        if unlabelled:
            final_unlabelled = {**final_unlabelled, **unlabelled}
Example #12
0
                for i, box in enumerate(polys_list[idx]):
                    box[:, 0] *= img_ratio[idx][1] * 2
                    box[:, 1] *= img_ratio[idx][0] * 2
                    # warp_img = mask_crop.imgWrapAffine(img, np.int0(box))
                    # cv2.imwrite(''.join(['./result/warp_img_', str(i), '.jpg']), warp_img)
                    poly = np.array(box).astype(np.int32).reshape((-1))
                    point_list2.append(','.join([str(p)
                                                 for p in poly]) + ',1\n')
                    f.write(','.join([str(p) for p in poly]) + ',1\n')
                    cv2.polylines(img, [poly.reshape((-1, 1, 2))],
                                  True,
                                  color=(0, 0, 255),
                                  thickness=2)
            cv2.imwrite(''.join(['./result/', str(time.time()), '.jpg']), img)


if __name__ == '__main__':
    TextDetect = TextDetection('weights/craft_mlt_25k.pth')
    set_batch = False

    s = time.time()
    imgs = [
        io.imread(x, pilmode='RGB') for x in file_utils.get_files('imgs')[0]
    ]
    if set_batch:
        TextDetect.detect(imgs, set_batch)
    else:
        for img in imgs:
            TextDetect.detect([img], set_batch)
    print(time.time() - s)
Example #13
0
def collect_new_dict(dict_dir: str, dest_dict_file: str):
    new_dict_files = file_utils.get_files(dict_dir)
    word_counter = Counter([verify(word) for dict_file in new_dict_files for word in file_utils.read_line(dict_file)])
    # list_utils.print_list(word_counter.keys())
    file_utils.save_list2file(word_counter.keys(), dest_dict_file)
Example #14
0
import os, sys
from docutils.nodes import copyright
from jinja2.utils import object_type_repr

sys.path.append(os.path.abspath('..'))

import numpy as np
import h5py
import json
import argparse, random
from utils import file_utils


base = '/home/rfelixmg/Dropbox/PROJETOS/ZSL_DS_SJE/experiments/cub/'
file_names = file_utils.get_files(base, 'txt')
format_result = []

for file_name in file_names:
    result = None
    with open(base + file_name, 'r') as in_:
        result = json.load(in_)

    evaluation = result['evaluation']
    configuration = result['configuration']

    line = [str(result['configuration']['output_file'].split('/')[-1][:-4]),
            result['evaluation']['accuracy_test'],
            result['evaluation']['coeficient_determination_test'],
            result['evaluation']['precision_test'],
            result['evaluation']['recall_test'],