def main(args): (image, cnts, pixelsPerMetric) = mark_countor(args['image']) for cnt in cnts: # ignore the contour if the area is small(noise) if cv2.contourArea(cnt) < Config.AREA_THRESHOLD: continue visual(image, cnt, pixelsPerMetric, args['width'])
def text_detect_CRAFT(img, craft_config, CRAFT_MODEL, sortbb=True, visual_img=False): ''' args: img: image craft_config: config of craft CRAFT_MODEL: craft model sort_bb: whether or not sort bounding box visual_image: whether or no not visual image return: bboxes: bbox of text polys: polygon of text score_text: confidence score ''' # img = loadImage(image_path) bboxes, polys, score_text = craft_text_detect(img, craft_config, CRAFT_MODEL) if sortbb: bboxes = sort_bb(bboxes) if visual_img: img = visual(img, polys) return bboxes, polys, score_text
def visualization(sess, model, data_loader, filename): saver = tf.train.Saver(max_to_keep=1) ckpt = tf.train.get_checkpoint_state(flags.ckpt_dir) if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) print(" [*] loading parameters success !!!") else: print(" [!] loading parameters failed ...") return # user,item,label, utexts, itexts, text= data_loader.sample_point() user, item, label, utexts, itexts, text = data_loader.find_a_user() utexts = utexts.astype(int) itexts = itexts.astype(int) feed_dict = { model.u_input: user, model.i_input: item, model.label: label, model.utext: utexts, model.itext: itexts, model.text: text, model.keep_prob: 1.0 } res = sess.run([ model.word_user_alpha, model.word_item_alpha, model.doc_user_alpha, model.doc_item_alpha ], feed_dict=feed_dict) print(utexts.dtype) u_texts = data_loader.vec_texts[utexts] i_texts = data_loader.vec_texts[itexts] res[2] = np.array(res[2]).transpose(1, 0, 2) res[3] = np.array(res[3]).transpose(1, 0, 2) for i in range(len(user)): uit = [user[i], item[i], label[i]] print(uit) res_trans = [] for r in res: res_trans.append(r[i]) visual(res_trans, uit, data_loader, utexts[i], itexts[i], u_texts[i], i_texts[i], filename)
def cluster_image(img, colorspace, clusters, func, name): if colorspace is not None: img = cv2.cvtColor(img, colorspace) points = utils.image_to_points(img) labels = func(points, clusters) res, mask = utils.visual(points, labels, img.shape, name, colorspace) return res, labels, mask
def text_detect_CRAFT(image_path, craft_config, net_craft, sortbb=True, visual_img=True): img = loadImage(image_path) bboxes, polys, score_text = craft_text_detect(img, craft_config, net_craft) if sortbb: polys = sorting_bounding_box(polys) if visual_img: img = visual(img, polys) return img, bboxes, polys, score_text
Download weigth của craft trên drive về bỏ vào thư mục libs/CRAFT/models Chạy file này, thay link ảnh đầu vào, đầu ra của file này sẽ trả về danh sách xmin, ymin, xmax, ymax của các bounding box đã được sắp xếp theo thứ tự từ trái qua phải, từ trên xuống dưới ''' def loadImage(img_file): img = cv2.imread(img_file) # RGB order if img.shape[0] == 2: img = img[0] if len(img.shape) == 2: img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB) if img.shape[2] == 4: img = img[:, :, :3] img = np.array(img) return img # setup config cfg = get_config() cfg.merge_from_file('configs/craft.yaml') craft_config = cfg.CRAFT # run craft net = CRAFT() img = loadImage('data/test.jpg') print('--------craft processing----------') bboxes, polys, score_text = craft_text_detect(img, craft_config, net) polys = sorting_bounding_box(polys) # hàm lưu ảnh kết quả xem chơi visual(img, polys) print('--------craft done ----------')
def __call__(self, image, targets, input_dim): boxes = targets[:, :4].copy() labels = targets[:, 4].copy() if targets.shape[1] > 5: mixup = True ratios = targets[:, -1].copy() ratios_o = targets[:, -1].copy() else: mixup = False ratios = None ratios_o = None lshape = 6 if mixup else 5 if len(boxes) == 0: targets = np.zeros((self.max_labels, lshape), dtype=np.float32) image = preproc_for_test(image, input_dim, self.means, self.std) image = np.ascontiguousarray(image, dtype=np.float32) return torch.from_numpy(image), torch.from_numpy(targets) image_o = image.copy() targets_o = targets.copy() height_o, width_o, _ = image_o.shape boxes_o = targets_o[:, :4] labels_o = targets_o[:, 4] b_x_o = (boxes_o[:, 2] + boxes_o[:, 0]) * 0.5 b_y_o = (boxes_o[:, 3] + boxes_o[:, 1]) * 0.5 b_w_o = (boxes_o[:, 2] - boxes_o[:, 0]) * 1.0 b_h_o = (boxes_o[:, 3] - boxes_o[:, 1]) * 1.0 boxes_o[:, 0] = b_x_o boxes_o[:, 1] = b_y_o boxes_o[:, 2] = b_w_o boxes_o[:, 3] = b_h_o boxes_o[:, 0::2] /= width_o boxes_o[:, 1::2] /= height_o boxes_o[:, 0::2] *= input_dim[0] boxes_o[:, 1::2] *= input_dim[1] # labels_o = np.expand_dims(labels_o,1) # targets_o = np.hstack((boxes_o,labels_o)) # targets_o = np.hstack((labels_o,boxes_o)) image_t = _distort(image) if self.means is not None: fill = [m * 255 for m in self.means] fill = fill[::-1] else: fill = (127.5, 127.5, 127.5) image_t, boxes = _expand(image_t, boxes, fill, self.p) image_t, boxes, labels, ratios = _crop(image_t, boxes, labels, ratios) image_t, boxes = _mirror(image_t, boxes) if random.randrange(2): image_t, boxes, _ = _random_affine(image_t, boxes, borderValue=fill) height, width, _ = image_t.shape if DEBUG: image_t = np.ascontiguousarray(image_t, dtype=np.uint8) img = visual(image_t, boxes, labels) cv2.imshow("DEBUG", img) cv2.waitKey(0) image_t = preproc_for_test(image_t, input_dim, self.means, self.std) boxes = boxes.copy() b_x = (boxes[:, 2] + boxes[:, 0]) * 0.5 b_y = (boxes[:, 3] + boxes[:, 1]) * 0.5 b_w = (boxes[:, 2] - boxes[:, 0]) * 1.0 b_h = (boxes[:, 3] - boxes[:, 1]) * 1.0 boxes[:, 0] = b_x boxes[:, 1] = b_y boxes[:, 2] = b_w boxes[:, 3] = b_h boxes[:, 0::2] /= width boxes[:, 1::2] /= height boxes[:, 0::2] *= input_dim[0] boxes[:, 1::2] *= input_dim[1] mask_b = np.minimum(boxes[:, 2], boxes[:, 3]) > 6 # mask_b= (boxes[:,2]*boxes[:,3]) > 32**2 # mask_b= (boxes[:,2]*boxes[:,3]) > 48**2 boxes_t = boxes[mask_b] labels_t = labels[mask_b].copy() if mixup: ratios_t = ratios[mask_b].copy() """ if len(boxes_t)==0: targets = np.zeros((self.max_labels,lshape),dtype=np.float32) image = preproc_for_test(image_o, input_dim, self.means, self.std) image = np.ascontiguousarray(image, dtype=np.float32) return torch.from_numpy(image), torch.from_numpy(targets) """ # if len(boxes_t)==0 or random.random() > 0.97: if len(boxes_t) == 0: image_t = preproc_for_test(image_o, input_dim, self.means, self.std) boxes_t = boxes_o labels_t = labels_o ratios_t = ratios_o labels_t = np.expand_dims(labels_t, 1) if mixup: ratios_t = np.expand_dims(ratios_t, 1) targets_t = np.hstack((labels_t, boxes_t, ratios_t)) else: targets_t = np.hstack((labels_t, boxes_t)) padded_labels = np.zeros((self.max_labels, lshape)) padded_labels[range( len(targets_t))[:self.max_labels]] = targets_t[:self.max_labels] padded_labels = np.ascontiguousarray(padded_labels, dtype=np.float32) image_t = np.ascontiguousarray(image_t, dtype=np.float32) return torch.from_numpy(image_t), torch.from_numpy(padded_labels)