def test_folder(self, folder_path): image_list, _, _ = file_utils.get_files(folder_path) if not os.path.exists(pr.result_folder): os.mkdir(pr.result_folder) t = time.time() # load data for k, image_path in enumerate(image_list): print("Test image {:d}/{:d}: {:s}".format(k + 1, len(image_list), image_path), end='\r') bboxes, polys, score_text = self.text_detect(image_path) # save score text filename, file_ext = os.path.splitext(os.path.basename(image_path)) mask_file = pr.result_folder + "/res_" + filename + '_mask.jpg' cv2.imwrite(mask_file, score_text) image = imgproc.loadImage(image_path) file_utils.saveResult(image_path, image[:, :, ::-1], polys, dirname=pr.result_folder) print("elapsed time : {}s".format(time.time() - t))
def get_content_dict(answers_dir, spliter): total_content_dict = {} for content_file in file_utils.get_files(answers_dir): content_dict = dict( file_utils.read_line(content_file, lambda content: (content[0], content[1]), split=spliter)) total_content_dict.update(content_dict) return total_content_dict
def from_files(cls, path, extensions=None, recurse=True, include=None, **kwargs): audio_extensions = set(k for k, v in mimetypes.types_map.items() if v.startswith('audio/')) if extensions is None: extensions = audio_extensions return cls( file_utils.get_files(path, extensions, recurse=recurse, include=include), path, **kwargs)
def collect_dataset(directory): raw_text = '' raw_dataset = {} for folder in file_utils.get_folders(directory): folder_path = directory + folder + '/' new_samples = {} for txt_file in file_utils.get_files(folder_path, obj_type='txt'): t, lines = file_utils.load_text(txt_file, folder_path) new_samples[txt_file[:-4]] = lines raw_text += ' ' + t raw_dataset[folder] = new_samples return raw_text, raw_dataset
def make_one(file_dir='../data/train_blocks_3_layer'): files = fu.get_files(file_dir) fu.safe_dir('blocks_1_layer') for im_name in files[:5]: print('working on: ', im_name) im_path = os.path.join(file_dir, im_name) im = io.imread(im_path) if len(im.shape) == 2: print('shape already 2d: ', im.shape) continue im_pil = Image.fromarray(im[:, :, 0]) im_pil.save(im_path) print('done')
def detect_img(yolo): cnt = 0 while True: if cnt > 3: break cnt += 1 img_p = input('Input image path:') files = file_utils.get_files(img_p) print("路径:", img_p, "中图片数量:", len(files)) output_path = input('output image path:') for fp in files: print("预测图片:", fp) image = Image.open(fp) r_image = yolo.detect_image(image) base_name = os.path.basename(fp) r_image.save(os.path.join(output_path, base_name)) is_continue = input("是否继续:y/n") if is_continue == 'n': break yolo.close_session()
def verify_download_files(c_id): file_list = get_files('video_cache_all\\586d23485f07127674135d4d') file_name_dict = dict() file_count_dict = dict() for filename in file_list: elements = filename.split('\\') key = "{0}-{1}".format(elements[2], elements[3]) # file count if key not in file_count_dict: file_count_dict[key] = 0 count = file_count_dict[key] file_count_dict[key] = count + 1 # file name file_name_dict[key] = elements[4] for key, name in file_name_dict.items(): count = file_count_dict[key] if name != "Wanmen-{0}.ts".format("%05d" % (count - 1)): print("{}: filename: {}, count: {}\n".format(key, name, count))
if __name__ == "__main__": net = CRAFT(input_shape=(args.canvas_size, args.canvas_size, 3)) checkpoint = tf.train.Checkpoint(model=net) checkpoint_dir = tf.train.latest_checkpoint(args.weight_dir) # checkpoint_dir = os.path.join(args.weight_dir, "ckpt-10") checkpoint.restore(checkpoint_dir) print("Restored from %s" % checkpoint_dir) # prefix_filename = datetime.datetime.now().strftime('%m%d_%H:%M:%S') prefix_filename = checkpoint_dir.split("/")[-1] # if not os.path.exists("./logs/fit/"): # os.makedirs("logs/fit/") # log_dir = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S") # tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1) image_list, _, _ = get_files(args.test_folder) result_folder = './result/' if not os.path.isdir(result_folder): os.mkdir(result_folder) # LinkRefiner refine_net = None # if args.refine: # # TODO # pass # load data for k, image_path in enumerate(image_list): print("Test image {:d}/{:d}: {:s}".format(k + 1, len(image_list), image_path), end='\r')
def __init__(self, t_dir, res_dir='results'): self.t_dir = t_dir self.res_dir = res_dir self.file_list = fu.get_files(t_dir, 'results', 'results_np', 'res_results') self.num_files = len(self.file_list)
parser.add_argument('--low_text', default=0.4, type=float, help='text low-bound score') parser.add_argument('--link_threshold', default=0.4, type=float, help='link confidence threshold') parser.add_argument('--cuda', default=True, type=str2bool, help='Use cuda for inference') parser.add_argument('--canvas_size', default=1280, type=int, help='image size for inference') parser.add_argument('--mag_ratio', default=1.5, type=float, help='image magnification ratio') parser.add_argument('--poly', default=False, action='store_true', help='enable polygon type') parser.add_argument('--show_time', default=False, action='store_true', help='show processing time') parser.add_argument('--test_folder', default='/imgs/', type=str, help='folder path to input images') parser.add_argument('--refine', default=False, action='store_true', help='enable link refiner') parser.add_argument('--refiner_model', default='weights/craft_refiner_CTW1500.pth', type=str, help='pretrained refiner model') args = parser.parse_args() """ For test images in a folder """ image_list, _, _ = file_utils.get_files(args.test_folder) result_folder = './result/' if not os.path.isdir(result_folder): os.mkdir(result_folder) def test_net(net, image, text_threshold, link_threshold, low_text, cuda, poly, refine_net=None): t0 = time.time() # resize img_resized, target_ratio, size_heatmap = imgproc.resize_aspect_ratio(image, args.canvas_size, interpolation=cv2.INTER_LINEAR, mag_ratio=args.mag_ratio) ratio_h = ratio_w = 1 / target_ratio # preprocessing x = imgproc.normalizeMeanVariance(img_resized) x = torch.from_numpy(x).permute(2, 0, 1) # [h, w, c] to [c, h, w]
from utils.document import Document, extract_labels, merge_dicts from utils.file_utils import get_files, save_files import time from tqdm import tqdm from multiprocessing import Process, Queue if __name__ == '__main__': start = time.time() # Indicate the path to the files from where the texts # and labels need to be extracted. main_dir = '/Users/samuelrodriguezmedina/Documents/ir4sdgs/crawl_sdgs/' folders = [ 'word', 'other_html', 'pdf', 'extra_pdf', 'extra_word', 'downloads', 'downloadable_pdfs' ] files = get_files(main_dir, folders) final_labelled = {} final_unlabelled = {} q = Queue() for file in tqdm(files): p = Process(target=extract_labels, args=( doc, q, )) p.start() labelled, unlabelled = q.get() if labelled: final_labelled = merge_dicts(final_labelled, labelled) if unlabelled: final_unlabelled = {**final_unlabelled, **unlabelled}
for i, box in enumerate(polys_list[idx]): box[:, 0] *= img_ratio[idx][1] * 2 box[:, 1] *= img_ratio[idx][0] * 2 # warp_img = mask_crop.imgWrapAffine(img, np.int0(box)) # cv2.imwrite(''.join(['./result/warp_img_', str(i), '.jpg']), warp_img) poly = np.array(box).astype(np.int32).reshape((-1)) point_list2.append(','.join([str(p) for p in poly]) + ',1\n') f.write(','.join([str(p) for p in poly]) + ',1\n') cv2.polylines(img, [poly.reshape((-1, 1, 2))], True, color=(0, 0, 255), thickness=2) cv2.imwrite(''.join(['./result/', str(time.time()), '.jpg']), img) if __name__ == '__main__': TextDetect = TextDetection('weights/craft_mlt_25k.pth') set_batch = False s = time.time() imgs = [ io.imread(x, pilmode='RGB') for x in file_utils.get_files('imgs')[0] ] if set_batch: TextDetect.detect(imgs, set_batch) else: for img in imgs: TextDetect.detect([img], set_batch) print(time.time() - s)
def collect_new_dict(dict_dir: str, dest_dict_file: str): new_dict_files = file_utils.get_files(dict_dir) word_counter = Counter([verify(word) for dict_file in new_dict_files for word in file_utils.read_line(dict_file)]) # list_utils.print_list(word_counter.keys()) file_utils.save_list2file(word_counter.keys(), dest_dict_file)
import os, sys from docutils.nodes import copyright from jinja2.utils import object_type_repr sys.path.append(os.path.abspath('..')) import numpy as np import h5py import json import argparse, random from utils import file_utils base = '/home/rfelixmg/Dropbox/PROJETOS/ZSL_DS_SJE/experiments/cub/' file_names = file_utils.get_files(base, 'txt') format_result = [] for file_name in file_names: result = None with open(base + file_name, 'r') as in_: result = json.load(in_) evaluation = result['evaluation'] configuration = result['configuration'] line = [str(result['configuration']['output_file'].split('/')[-1][:-4]), result['evaluation']['accuracy_test'], result['evaluation']['coeficient_determination_test'], result['evaluation']['precision_test'], result['evaluation']['recall_test'],