def draw_bboxes_withindex(img, boxes, uids): """ A helper function to draw bounding box rectangles on images Args: img: image to be drawn on in array format boxes: An (N,4) array of bounding boxes Output: Image with drawn bounding boxes """ source = Image.fromarray(img) draw = ImageDraw.Draw(source) w2, h2 = (img.shape[0], img.shape[1]) font = ImageFont.truetype( '/usr/share/fonts/truetype/freefont/FreeSerif.ttf', 40) #font = ImageFont.truetype('arial.ttf', 24) idx = 0 for b in boxes: xmin, ymin, xmax, ymax = b for j in range(3): draw.rectangle(((xmin + j, ymin + j), (xmax + j, ymax + j)), outline="red") draw.text((xmin + 20, ymin + 70), str(uids[idx]), font=font) idx += 1 return source
def generate_position_image(element, position_converter): img = Image.new("RGB", (100, 100), "white") if isinstance(element, E): Ep = element.value elements = Ep.strip(" ()").split("+") elif isinstance(element, str): if element == "all": elements = position_converter.keys() else: Ep = element elements = Ep.strip(" ()").split("+") else: return img draw = ImageDraw.Draw(img) for el in elements: x_start, y_start, x_end, y_end = position_converter[el] draw.rectangle(((x_start, y_start), (x_end, y_end)), fill="gold", outline=True, width=1) draw.text((x_start, y_start), el, fill="black") draw.rectangle(((0, 0), (100 - 1, 100 - 1)), outline=True, width=1) return img
def puttext(self, cv_image, texts, point=(30, 30), font_path='/IPAexfont00401/ipaexm.ttf', font_size=100, color=(255, 0, 0)): h, w, _ = img.shape font_path = './font' + font_path font = ImageFont.truetype(font_path, font_size) cv_rgb_image = cv2.cvtColor(cv_image, cv2.COLOR_BGR2RGB) pil_image = Image.fromarray(cv_rgb_image) draw = ImageDraw.Draw(pil_image) dx = 0 dy = (h // 4) // len(texts) for i, text in enumerate(texts): repoint = (point[0] + dx, point[1] + i * dy) draw.text(repoint, text, fill=color, font=font) cv_rgb_result_image = np.asarray(pil_image) cv_bgr_result_image = cv2.cvtColor(cv_rgb_result_image, cv2.COLOR_RGB2BGR) return cv_bgr_result_image
def _sample(): x = np.random.randint(width, size=4).tolist() y = np.random.randint(height, size=4).tolist() x.sort() y.sort() # sample intersected bboxes fake = Faker() if fake.pybool(): if fake.pybool(): ax0, bx0, ax1, bx1 = x else: bx0, ax0, bx1, ax1 = x if fake.pybool(): ay0, by0, ay1, by1 = y else: by0, ay0, by1, ay1 = y else: ax0, bx0, bx1, ax1 = x ay0, by0, by1, ay1 = y bboxes = [(ax0, ay0, ax1, ay1), (bx0, by0, bx1, by1)] # sample layers layers = [0, 3] shuffle(layers) im = Image.new("RGB", (width, width)) draw = ImageDraw.Draw(im) im_t = [np.array(im)] for layer, bbox in zip(layers, bboxes): x0, y0, x1, y1 = bbox if layer == 4: draw.text((x0, y0), fake.sentence(), fill=fake.hex_color()) elif layer == 3: f = os.path.join(photo_folder, files[fake.pyint(min=0, max=len(files) - 1)]) _im = Image.open(f).resize((x1 - x0, y1 - y0)) im.paste(_im, box=(x0, y0)) elif layer == 0: draw.rectangle((x0, y0, x1, y1), fill=fake.hex_color()) im_t.append(np.array(im)) # sample final layer: text x0 = np.random.randint(width / 2) y0 = np.random.randint(height / 2) text = fake.sentence() draw.text((x0, y0), text, fill=fake.hex_color()) w, h = draw.textsize(text) layers.append(4) bboxes.append((x0, y0, w, h)) ims = np.stack( [np.concatenate([x, np.array(im)], axis=2) for x in im_t]) return ims, np.array(layers), np.array(bboxes)
def __getitem__(self, idx): sample = self.dataset[self.imageids[idx]] seg = self.seg[None, :, :] # Convert to properly-sized tensors img = self.convert(sample["img"][0]) draw_distractor = False if self.mode == "train": if self.labels[idx] == 1: if (np.random.rand() < self.prob): draw_distractor = True else: if self.labels[idx] == 0: if (np.random.rand() < self.prob): draw_distractor = True if draw_distractor: draw = ImageDraw.Draw(img) font = ImageFont.load_default() #.truetype("sans-serif.ttf", 16) draw.text((np.random.randint(5) + 10, np.random.randint(5) + 10), "R", np.random.randint(10) + 245) # Enforces that the image is a square. if self.new_size != img.width == img.height: img = self.resize(img) # Enforce datatype img = TF.to_tensor(img).float() seg = TF.to_tensor(seg).permute([1, 0, 2]).float() if self.mask_all: try: img *= seg except: import IPython IPython.embed() return (img, seg, self.labels[idx]) #self.masks_selector[idx]
def center_point_splash(image, mask, output_path=None): draw = ImageDraw.Draw(image) if mask.shape[-1] > 0: # We're treating all instances as one, so collapse the mask into one layer font = ImageFont.truetype('simsun.ttc', 40) shape = mask.shape dim = shape[2] count = 1 for i in range(dim): mask1 = mask[:, :, i] mask1 = mask1 + 0 gray = np.array(mask1, dtype='uint8') kernel = np.ones((20, 20), np.uint8) erosion = cv.erode(gray, kernel) # 腐蚀 im, contours, hierarchy = cv.findContours(erosion, cv.RETR_LIST, cv.CHAIN_APPROX_SIMPLE) if len(contours): cnt = contours[0] M = cv.moments(cnt) # print(M) cx = int(M['m10'] / (M['m00'] + 1)) cy = int(M['m01'] / (M['m00'] + 1)) """ if mask1[cy, cx] == 0: num = 1 avg = 0 for j in range(shape[0]): if mask1[cy, j] == 1: avg += j num += 1 cx = round(avg/num) """ draw.text((cx, cy), str(count), fill=(255, 0, 0), font=font) count += 1 image.save(output_path, 'jpeg')
def draw_text(image, text_message): #get a plain foregroun layer filled with yellow foreground = Image.new('RGB', (image.shape[1], image.shape[0]), (128, 128, 0, 128)) background = Image.fromarray(image[:, :, :]) mask = Image.new('L', (image.shape[1], image.shape[0]), 255) draw = ImageDraw.Draw(mask) fnt = ImageFont.truetype("Pillow/Tests/fonts/FreeMono.ttf", round(120 * scale_video)) w, h = draw.textsize(text_message, font=fnt) W, H = mask.size # draw subtitle in the bottom of the frame, aligned to the right, in an alpha layer, with alpha = 200/255 draw.text( ((W - w) - round(30 * scale_video), (H) - round(150 * scale_video)), text_message, fill=(200), font=fnt) #draw foreground on background respecting the alpha layer result = Image.composite(background, foreground, mask) return np.array(result)
def main(): args = get_args() if args.resume is None: raise ValueError('Must provide --resume when testing.') support_architectures = [ 'ksevendet', ] support_architectures += [f'efficientdet-d{i}' for i in range(8)] support_architectures += [ f'retinanet-res{i}' for i in [18, 34, 50, 101, 152] ] support_architectures.append('retinanet-p45p6') print(support_architectures) if args.architecture == 'ksevendet': ksevendet_cfg = args.model_cfg if ksevendet_cfg.get('variant'): network_name = f'{args.architecture}-{ksevendet_cfg["variant"]}-{ksevendet_cfg["neck"]}' else: assert 0, 'not support now.' assert isinstance(ksevendet_cfg, dict) network_name = f'{args.architecture}-{ksevendet_cfg["backbone"]}_specifical-{ksevendet_cfg["neck"]}' elif args.architecture in support_architectures: network_name = args.architecture else: raise ValueError('Architecture {} is not support.'.format( args.architecture)) args.network_name = network_name net_logger = get_logger(name='Network Logger', args=args) net_logger.info('Positive Threshold: {:.2f}'.format(args.threshold)) _shape_1, _shape_2 = tuple(map(int, args.input_shape.split(','))) _normalizer = Normalizer(inference_mode=True) if args.resize_mode == 0: _resizer = Resizer(min_side=_shape_1, max_side=_shape_2, resize_mode=args.resize_mode, logger=net_logger, inference_mode=True) elif args.resize_mode == 1: _resizer = Resizer(height=_shape_1, width=_shape_2, resize_mode=args.resize_mode, logger=net_logger, inference_mode=True) else: raise ValueError('Illegal resize mode.') transfrom_funcs_valid = [ _normalizer, _resizer, ] transform = transforms.Compose(transfrom_funcs_valid) net_logger.info('Number of Classes: {:>3}'.format(args.num_classes)) build_param = {'logger': net_logger} if args.architecture == 'ksevendet': net_model = ksevendet.KSevenDet(ksevendet_cfg, num_classes=args.num_classes, pretrained=False, **build_param) elif args.architecture == 'retinanet-p45p6': net_model = retinanet.retinanet_p45p6(num_classes=args.num_classes, **build_param) elif args.architecture.split('-')[0] == 'retinanet': net_model = retinanet.build_retinanet(args.architecture, num_classes=args.num_classes, pretrained=False, **build_param) elif args.architecture.split('-')[0] == 'efficientdet': net_model = efficientdet.build_efficientdet( args.architecture, num_classes=args.num_classes, pretrained=False, **build_param) else: assert 0, 'architecture error' net_logger.info('Loading Weights from Checkpoint : {}'.format(args.resume)) net_model.load_state_dict(torch.load(args.resume)) #model = torch.load(args.resume) use_gpu = True if use_gpu: if torch.cuda.is_available(): net_model = net_model.cuda() if torch.cuda.is_available(): net_model = torch.nn.DataParallel(net_model).cuda() else: net_model = torch.nn.DataParallel(net_model) #net_model.eval() net_model.module.eval() img_array = [] cap = cv2.VideoCapture(args.input_path) fontsize = 12 score_font = ImageFont.truetype("DejaVuSans.ttf", size=fontsize) cap_i = 0 while (cap.isOpened()): ret, frame = cap.read() if ret == False: break #if cap_i > 20: # break #img = skimage.io.imread(os.path.join(args.demo_path, f)) #if len(img.shape) == 2: # img = skimage.color.gray2rgb(img) a_img = np.copy(frame) img = Image.fromarray(np.uint8(frame)) a_img = a_img.astype(np.float32) / 255.0 a_img = transform(a_img) a_img = torch.unsqueeze(a_img, 0) a_img = a_img.permute(0, 3, 1, 2) # print('predict...') scores, labels, boxes = net_model(a_img, return_loss=False) scores = scores.cpu() labels = labels.cpu() boxes = boxes.cpu() # change to (x, y, w, h) (MS COCO standard) boxes[:, 2] -= boxes[:, 0] boxes[:, 3] -= boxes[:, 1] print(f'{cap_i} inference ...', end="\r") draw = ImageDraw.Draw(img) for box_id in range(boxes.shape[0]): score = float(scores[box_id]) label = int(labels[box_id]) box = boxes[box_id, :] # scores are sorted, so we can break if score < args.threshold: break x, y, w, h = box color_ = COLOR_LABEL[label] _text_offset_x, _text_offset_y = 2, 3 draw.rectangle(tuple([x, y, x + w, y + h]), width=1, outline=color_) draw.text(tuple( [int(x) + _text_offset_x + 1, int(y) + _text_offset_y]), '{:.3f}'.format(score), fill='#000000', font=score_font) draw.text(tuple([int(x) + _text_offset_x, int(y) + _text_offset_y]), '{:.3f}'.format(score), fill=color_, font=score_font) img_array.append(np.asarray(img)) cap_i += 1 cap.release() height, width, layers = img_array[0].shape size = (width, height) fps = 30 #fourcc = cv2.VideoWriter_fourcc('m', 'p', '4', 'v') input_video_name = os.path.basename(args.input_path) input_video_dir = os.path.dirname(args.input_path) out_video_path = os.path.join( 'trash', '{}_{}_thr{}.avi'.format( input_video_name[:-4], network_name if not args.model_name else args.model_name, int(args.threshold * 100))) print('Convert to video... {}'.format(out_video_path)) out = cv2.VideoWriter(out_video_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, size) for i in range(len(img_array)): out.write(img_array[i]) out.release() print('Done')
def load_verify_contour(data_path, phase='train'): data_path = os.path.join(data_path, phase) annotation_path = os.path.join(data_path, 'annotations') rgb_path = os.path.join(data_path, 'images') updated_mask = os.path.join(data_path, 'masks') cache_path = os.path.join(data_path, 'cache') verify_path = os.path.join(data_path, 'verify') # verify the extracted contour and bounding box, image saved in "verify" do_verification = False ground_truth_cache = os.path.join(cache_path, 'ground_truth_cache.pkl') if os.path.isfile(ground_truth_cache): print('Loading gt_labels from: ' + ground_truth_cache) with open(ground_truth_cache, 'rb') as f: gt_data = cPickle.load(f) return gt_data f_wrect = open(os.path.join(cache_path, phase + '.txt'), 'w') # creat image ID text annotations = [] imgfile = os.listdir(rgb_path) error_mask = 0 for i in range(len(imgfile)): file = imgfile[i] filename = os.path.splitext(file)[0] print(filename) f_wrect.write(filename + '\n') #Load image, load bounding box info from XML file in PASCAL VOC format annoname = os.path.join(annotation_path, filename + '.xml') if os.path.exists(annoname): objects = [] tree = ET.parse(annoname) objs = tree.findall('object') for obj in objs: obj_struct = {} cls_name = obj.find('name').text.lower().strip() obj_struct['class'] = cls_name bbox = obj.find('bndbox') x1 = float(bbox.find('xmin').text) - 1 y1 = float(bbox.find('ymin').text) - 1 x2 = float(bbox.find('xmax').text) - 1 y2 = float(bbox.find('ymax').text) - 1 obj_struct['bbox'] = [x1, y1, x2, y2] objects.append(obj_struct) # extract 'merge' box in list[[x1, y1, x2, y2],[x1, y1, x2, y2]...] object_merge = [ obj['bbox'] for obj in objects if obj['class'] == 'merge' ] rgb_file = os.path.join(rgb_path, filename + '.jpg') spallmask_file = os.path.join(updated_mask, filename + 'spall' + '.jpg') rebarmask_file = os.path.join(updated_mask, filename + 'rebar' + '.jpg') crackmask_file = os.path.join(updated_mask, filename + 'crack' + '.jpg') # load contours from mask file spall_contours = [] rebar_contours = [] crack_contours = [] if os.path.exists(rebarmask_file): img_binary = cv2.imread(rebarmask_file, cv2.IMREAD_GRAYSCALE) ret, rebarthresh = cv2.threshold(img_binary, 127, 255, 0) im2, rebar_contours, rebar_hierarchy = cv2.findContours( rebarthresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE) if os.path.exists(spallmask_file): img_binary = cv2.imread(spallmask_file, cv2.IMREAD_GRAYSCALE) ret, spallthresh = cv2.threshold(img_binary, 127, 255, 0) im2, spall_contours, spall_hierarchy = cv2.findContours( spallthresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE) if os.path.exists(crackmask_file): img_binary = cv2.imread(crackmask_file, cv2.IMREAD_GRAYSCALE) ret, crackthresh = cv2.threshold(img_binary, 127, 255, 0) im2, crack_contours, crack_hierarchy = cv2.findContours( crackthresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE) # generate contour cache regions = {} count = 0 pair = {} if not crack_contours == []: classname = 'crack' for j in range(len(crack_contours)): shape_groups = [] one_contour = crack_contours[j][:, 0, :] all_x = np.array(one_contour[:, 0]).tolist() all_y = np.array(one_contour[:, 1]).tolist() if crack_hierarchy[0, j, 3] == -1: shape_groups.append({ 'type': 'out', 'all_points_x': all_x, 'all_points_y': all_y }) regions[str(count)] = { 'region_attributes': classname, 'shape_attributes': shape_groups } pair[str(j)] = count count = count + 1 else: indexvalue = crack_hierarchy[0, j, 3] if not crack_hierarchy[0, indexvalue, 3] == -1: print('There may be errors in mask ' + filename + 'crack' + '.jpg') error_mask = error_mask + 1 continue index = pair[str(indexvalue)] shape_groups = regions[str(index)]['shape_attributes'] shape_groups.append({ 'type': 'in', 'all_points_x': all_x, 'all_points_y': all_y }) regions[str(index)] = { 'region_attributes': classname, 'shape_attributes': shape_groups } pair = {} if not spall_contours == []: classname = 'spall' for j in range(len(spall_contours)): shape_groups = [] one_contour = spall_contours[j][:, 0, :] all_x = np.array(one_contour[:, 0]).tolist() all_y = np.array(one_contour[:, 1]).tolist() # check if the contour is inside another and thus [:,:,3]parent is not ==-1 if spall_hierarchy[0, j, 3] == -1: shape_groups.append({ 'type': 'out', 'all_points_x': all_x, 'all_points_y': all_y }) regions[str(count)] = { 'region_attributes': classname, 'shape_attributes': shape_groups } pair[str(j)] = count count = count + 1 else: indexvalue = spall_hierarchy[0, j, 3] if not spall_hierarchy[ 0, indexvalue, 3] == -1: # second inside defect masks, usually should not happen print('There may be errors in mask ' + filename + 'spall' + '.jpg') error_mask = error_mask + 1 continue index = pair[str( indexvalue)] # find the count of the parent contour shape_groups = regions[str(index)]['shape_attributes'] shape_groups.append({ 'type': 'in', 'all_points_x': all_x, 'all_points_y': all_y }) regions[str(index)] = { 'region_attributes': classname, 'shape_attributes': shape_groups } pair = {} if not rebar_contours == []: classname = 'rebar' for j in range(len(rebar_contours)): shape_groups = [] one_contour = rebar_contours[j][:, 0, :] all_x = np.array(one_contour[:, 0]).tolist() all_y = np.array(one_contour[:, 1]).tolist() if rebar_hierarchy[0, j, 3] == -1: shape_groups.append({ 'type': 'out', 'all_points_x': all_x, 'all_points_y': all_y }) regions[str(count)] = { 'region_attributes': classname, 'shape_attributes': shape_groups } pair[str(j)] = count count = count + 1 else: indexvalue = rebar_hierarchy[0, j, 3] if not rebar_hierarchy[0, indexvalue, 3] == -1: print('There may be errors in mask ' + filename + 'rebar' + '.jpg') error_mask = error_mask + 1 continue else: index = pair[str(indexvalue)] shape_groups = regions[str(index)]['shape_attributes'] shape_groups.append({ 'type': 'in', 'all_points_x': all_x, 'all_points_y': all_y }) regions[str(index)] = { 'region_attributes': classname, 'shape_attributes': shape_groups } # merge instances acording to "object_merge" if os.path.exists(annoname): merge_groups = {} name_list = {} for jj in range(len(object_merge)): merge_groups[str(jj)] = [] name_list[str(jj)] = [] # assign each instance to merge_groups instance_num = len(regions) for k in range(instance_num): one_region = regions[str(k)] polygons = one_region['shape_attributes'] classname = one_region['region_attributes'] check = 0 old_center_dis = 4000 polygon = polygons[0] # only need consider the outmost contour all_x1 = polygon['all_points_x'] all_y1 = polygon['all_points_y'] rr, cc = skimage.draw.polygon(all_y1, all_x1) all_p1 = np.column_stack([np.array(all_x1), np.array(all_y1)]) contour = np.expand_dims(all_p1, axis=1) M = cv2.moments(contour) cX = int(M["m10"] / M["m00"]) cY = int(M["m01"] / M["m00"]) for ii in range(len(object_merge)): [x1, y1, x2, y2] = object_merge[ii] if cX <= x1 or cX >= x2 or cY <= y1 or cY >= y2: continue center_disx = (x1 + x2) / 2 - cX center_dixy = (y1 + y2) / 2 - cY new_center_dis = (center_disx**2 + center_dixy**2)**0.5 if new_center_dis < old_center_dis: dis_index = ii old_center_dis = new_center_dis if (ii + 1) == len(object_merge): [x1, y1, x2, y2] = object_merge[dis_index] if cX >= x1 and cX <= x2 and cY >= y1 and cY <= y2: merge_groups[str(dis_index)].extend(polygons) name_list[str(dis_index)].extend([classname]) check = 1 if not check == 1: print('No merged box belongs to the defect in ' + file) # update "regions" new_regions = {} count = 0 for jj in range(len(object_merge)): if merge_groups[str(jj)] == []: print('No defect belongs to this merged box ' + file) else: # determine the class name for this merge box: [crack, spall, rebar] or [crack, spall] namelist = name_list[str(jj)] if 'crack' in namelist: classname = 'crack' elif 'spall' in namelist and 'rebar' not in namelist: classname = 'spall' elif 'rebar' in namelist: classname = 'rebar' new_regions[str(count)] = { 'region_attributes': classname, 'shape_attributes': merge_groups[str(jj)] } count = count + 1 damage_bgr = cv2.imread(rgb_file) # read and save in BGR mode height, width, _ = damage_bgr.shape if os.path.exists(annoname): copy_regions = new_regions else: copy_regions = regions # save in annotations list annotations.append({ 'filename': file, 'regions': copy_regions, 'size': [height, width] }) # verify the annotation for each image if do_verification: damage_rgb = cv2.cvtColor(damage_bgr, cv2.COLOR_BGR2RGB) instance_num = len(copy_regions) boxes = np.zeros([instance_num, 4], dtype=np.int32) boxes_name = [] instance_mask = [] for k in range(instance_num): one_region = copy_regions[str(k)] class_name = one_region['region_attributes'] polygons = one_region['shape_attributes'] each_mask = np.zeros([height, width], dtype=np.bool) for each_poly in polygons: subtype = each_poly['type'] x_points = each_poly['all_points_x'] y_points = each_poly['all_points_y'] rr, cc = skimage.draw.polygon(y_points, x_points) if subtype == 'out': each_mask[rr, cc] = True each_mask[np.array(y_points), np.array(x_points)] = True else: each_mask[ rr, cc] = False # remove the inside background region each_mask[np.array(y_points), np.array(x_points)] = True instance_mask.append(each_mask) # extract the box from mask y1, x1, y2, x2 = extract_bboxes(each_mask) boxes[k] = np.array([y1, x1, y2, x2]).astype(np.int32) boxes_name.append(class_name) # creat merged new mask for each class of each image crack_mask = np.zeros([height, width], dtype=np.uint8) spall_mask = np.zeros([height, width], dtype=np.uint8) rebar_mask = np.zeros([height, width], dtype=np.uint8) for k in range(instance_num): defectname = boxes_name[k] defectmask = instance_mask[k] defectmask = (defectmask * 255).astype(np.uint8) if defectname == 'crack': crack_mask = np.where(defectmask == 255, 255, crack_mask) elif defectname == 'spall': spall_mask = np.where(defectmask == 255, 255, spall_mask) elif defectname == 'rebar': rebar_mask = np.where(defectmask == 255, 255, rebar_mask) # plot masks on original image if np.max(crack_mask) == 255: color = [255, 255, 0] # yellow for c in range(3): damage_rgb[:, :, c] = np.where( crack_mask == 255, damage_rgb[:, :, c] * 0.8 + 0.2 * color[c], damage_rgb[:, :, c]) if np.max(spall_mask) == 255: color = [0, 255, 255] # Cyan for c in range(3): damage_rgb[:, :, c] = np.where( spall_mask == 255, damage_rgb[:, :, c] * 0.85 + 0.15 * color[c], damage_rgb[:, :, c]) if np.max(rebar_mask) == 255: color = [255, 0, 255] # Magenta for c in range(3): damage_rgb[:, :, c] = np.where( rebar_mask == 255, damage_rgb[:, :, c] * 0.8 + 0.2 * color[c], damage_rgb[:, :, c]) # draw bounding boxes img_draw = Image.fromarray(damage_rgb) draw = ImageDraw.Draw(img_draw) font = ImageFont.truetype(font='fonttype/FiraMono-Medium.otf', size=int(0.02 * height)) for j in range(instance_num): y1, x1, y2, x2 = boxes[j, :] draw.line([x1, y1, x1, y2], fill=(255, 0, 0), width=2) draw.line([x2, y1, x2, y2], fill=(255, 0, 0), width=2) draw.line([x1, y1, x2, y1], fill=(255, 0, 0), width=2) draw.line([x1, y2, x2, y2], fill=(255, 0, 0), width=2) text_str = str(j) + ' ' + boxes_name[j] draw.text(np.array([x1, y1]), text_str, font=font, fill=(0, 0, 255)) del draw imagedir = os.path.join(verify_path, filename + '.jpg') img_draw.save(imagedir) print('Number of error mask is ' + str(error_mask)) print('Saving gt_labels to: ' + ground_truth_cache) with open(ground_truth_cache, 'wb') as f: cPickle.dump(annotations, f) return annotations
def display_instances(image, boxes, masks, class_ids, class_names, scores, image_name, save_dir, title="", figsize=(16, 16), ax=None, show_mask=True, show_bbox=True, colors=None, captions=None): """ boxes: [num_instance, (y1, x1, y2, x2, class_id)] in image coordinates. masks: [height, width, num_instances] class_ids: [num_instances] class_names: list of class names of the dataset scores: (optional) confidence scores for each box title: (optional) Figure title show_mask, show_bbox: To show masks and bounding boxes or not figsize: (optional) the size of the image colors: (optional) An array or colors to use with each object captions: (optional) A list of strings to use as captions for each object """ N = boxes.shape[0] colors = colors or random_colors(N) if not N: print("\n*** No instances in image %s to draw *** \n" % (image_name)) masked_image = image.astype(np.uint8).copy() cv2.imwrite(os.path.join(save_dir, '%s' % (image_name)), masked_image) return else: assert boxes.shape[0] == masks.shape[-1] == class_ids.shape[0] useful_mask_indices = [] for i in range(N): # Generate random colors colors = colors or random_colors(N) if not np.any(boxes[i]): # Skip this instance. Has no bbox. Likely lost in image cropping. continue useful_mask_indices.append(i) masked_image = image.astype(np.uint8).copy() for index, value in enumerate(useful_mask_indices): class_id = class_ids[value] label = class_names[class_id] # Skip hand,mouth masking if (label == 'hand') or (label == 'mouth'): pass else: masked_image = apply_mask(masked_image, masks[:, :, value], colors[index]) masked_image = Image.fromarray(masked_image) draw = ImageDraw.Draw(masked_image) colors = np.array(colors).astype(int) * 255 for index, value in enumerate(useful_mask_indices): class_id = class_ids[value] score = scores[value] label = class_names[class_id] # object timeline if label in sec_object: pass else: sec_object.append(label) # object result count if label in count_obj: count_obj[label] += 1 else: count_obj[label] = 1 # hand, mouth disable and others able if (label == 'hand') or (label == 'mouth'): pass else: y1, x1, y2, x2 = boxes[value] color = tuple(colors[index]) draw.rectangle((x1, y1, x2, y2), outline=color) # Label # font = ImageFont.truetype('/Library/Fonts/Arial.ttf', 15) draw.text((x1, y1), "%s %f" % (label, score), (255, 255, 255)) masked_image.save(os.path.join(save_dir, '%s' % (image_name)))
def generate_text_data(width=64, n_sample=1000, n_strokes=1): print("Generating datasets...") fake = Faker() width = 256 space_x, space_y = 4, 0 # in pixel im = Image.new("RGB", (width, width)) draw = ImageDraw.Draw(im) x, y, w, h = 30, 50, 0, 0 _y = y for i in range(fake.pyint(min=1, max=5)): dx = 0 for j in range(fake.pyint(min=1, max=5)): word = fake.word() draw.text((x + dx, _y), word, fill=(255, 255, 255)) _w, _h = draw.textsize(word) # size of token dx += _w + space_x w = dx - space_x if dx - space_x > w else w _y += _h + space_y h = _y - y draw.rectangle([x, y, x + w, y + h]) tokens = fake.sentence().split() label, bbox, im = [], [], [] for _ in range(n_sample): _im, _labels = skimage.draw.random_shapes((64, 64), min_shapes=1, max_shapes=n_strokes, min_size=10) _label, ((r0, r1), (c0, c1)) = _labels[0] _class = LABEL_CLASS[_label] if r0 < r1: y0, y1 = r0, r1 x0, x1 = c0, c1 else: y0, y1 = r1, r0 x0, x1 = c1, c0 if x0 > x1 or y0 > y1: print((r0, r1), (c0, c1)) label.append(np.array((_class), dtype="uint8")) bbox.append(np.array((x0, y0, x1, y1), dtype="uint8")) im.append(_im) label = np.stack(label) bbox = np.stack(bbox) # (N, 5=(class, x0, y0, x1, y1)) im = np.stack(im).transpose(0, 3, 1, 2) # (N, H, W, C) -> (N, C, H, W) indices = np.arange(0, len(label), dtype="int32") train, test = train_test_split(indices, test_size=0.2, random_state=0) if not os.path.exists("data-multi-shape/"): os.makedirs("data-multi-shape/") np.save("data-multi-shape/train_label.npy", label[train]) np.save("data-multi-shape/train_bbox.npy", bbox[train]) np.save("data-multi-shape/train_images.npy", im[train]) np.save("data-multi-shape/test_label.npy", label[train]) np.save("data-multi-shape/test_bbox.npy", bbox[train]) np.save("data-multi-shape/test_images.npy", im[test])
def detect_and_color_splash(model, image_path=None, video_path=None, save_path=None): assert image_path or video_path num_spikes = [] pixel_count = [] spike_height = [] spike_width = [] center_mask = [] # Image or video? if image_path: # Run model detection and generate the color splash effect print("Running on {}".format(image_path)) # Read image image = skimage.io.imread(image_path) # Detect objects r = model.detect([image], verbose=1)[0] # Color splash splash = color_splash(image, r['masks']) print(type(r['masks'])) file_name = "splash_{:%Y%m%dT%H%M%S}.png".format(datetime.datetime.now()) bbInformationName = os.path.join(save_path, file_name[0:-3] + 'txt') # save bb information with open(bbInformationName, 'w') as file: # <class_name> <left> <top> <right> <bottom> [<difficult>] # bb information top left bottom right for each_roi, each_score in zip(r['rois'], r['scores']): file.write(f'spike {each_score}') file.write(f' {each_roi[1]} {each_roi[0]} {each_roi[3]} {each_roi[2]}') file.write('\n') # draw bb spike_cnt = 0 # number of spikes for eachBB in r['rois']: splash = drawBoundingBox(eachBB, splash) spike_cnt += 1 spike_height.append(eachBB[2] - eachBB[0]) spike_width.append(eachBB[3] - eachBB[1]) for _ in range(len(spike_height)): num_spikes.append(spike_cnt) # draw center of mask by k mean for eachBB, maskIndex in zip(r['rois'], range(0, len(r['rois']))): topBotList = [] leftRightList = [] maskCenter = {} for topBot in range(eachBB[0], eachBB[2]): for leftRight in range(eachBB[1], eachBB[3]): if r['masks'][topBot][leftRight][maskIndex]: topBotList.append(topBot) leftRightList.append(leftRight) maskCenter[maskIndex] = (sum(topBotList) // len(topBotList), sum(leftRightList) // len(leftRightList)) center_mask.append(f'({maskCenter[maskIndex][0]}, {maskCenter[maskIndex][1]})') splash = drawCenterMask(maskCenter[maskIndex], splash) # write confidence level pilImage = Image.fromarray(splash, 'RGB') # fnt = ImageFont.truetype('Pillow/Tests/fonts/FreeMono.ttf', 40) draw = ImageDraw.Draw(pilImage) for eachBB, eachText in zip(r['rois'], r['scores']): draw.text((eachBB[1], eachBB[0]), '{:3f}'.format(eachText), fill=(255,255,255,255)) # write number of pixel for eachBB, maskIndex in zip(r['rois'], range(0, len(r['rois']))): pixelSum = 0 topBotList = [] leftRightList = [] maskCenter = {} for topBot in range(eachBB[0], eachBB[2]): for leftRight in range(eachBB[1], eachBB[3]): if r['masks'][topBot][leftRight][maskIndex]: pixelSum += 1 topBotList.append(topBot) leftRightList.append(leftRight) pixel_count.append(pixelSum) draw.text((eachBB[1], eachBB[2]), '{}'.format(pixelSum), fill=(255,255,255,255)) # Save output pilImage.save(os.path.join(save_path, file_name)) # skimage.io.imsave(file_name, splash) elif video_path: import cv2 # Video capture vcapture = cv2.VideoCapture(video_path) width = int(vcapture.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(vcapture.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = vcapture.get(cv2.CAP_PROP_FPS) # Define codec and create video writer file_name = "splash_{:%Y%m%dT%H%M%S}.avi".format(datetime.datetime.now()) vwriter = cv2.VideoWriter(file_name, cv2.VideoWriter_fourcc(*'MJPG'), fps, (width, height)) count = 0 success = True while success: print("frame: ", count) # Read next image success, image = vcapture.read() if success: # OpenCV returns images as BGR, convert to RGB image = image[..., ::-1] # Detect objects r = model.detect([image], verbose=0)[0] # Color splash splash = color_splash(image, r['masks']) # RGB -> BGR to save image to video splash = splash[..., ::-1] # Add image to video writer vwriter.write(splash) count += 1 vwriter.release() print("Saved to ", file_name) return num_spikes, pixel_count, spike_height, spike_width, center_mask
from pytesseract import Output from PIL import Image , ImageDraw, ImageFont import cv2 img= Image.open("desktop/base_img.png") imgF= Image.open("desktop/edit_img.png") #imgg = color.xyz2rgb(img) draw = ImageDraw.Draw(imgF) font = ImageFont.truetype("Desktop/Roboto-Light.ttf", 20) d = pytesseract.image_to_data(img, output_type=Output.DICT) d2= pytesseract.image_to_data(imgF, output_type=Output.DICT) n_boxes = len(d2['level']) for i in range(n_boxes): (x, y, w, h) = (d2['left'][i], d2['top'][i], d2['width'][i], d2['height'][i]) if(d['text'][i]!=d2['text'][i]): border(y,x,y+h,x+w) draw.text((d2['left'][i], d2['top'][i]-20), text = d['text'][i], font = font, fill = "black") plt.figure(figsize=(120,120)) plt.imshow(imgF) import pytesseract from skimage import color from pytesseract import Output from PIL import Image, ImageFilter,ImageOps import cv2 img= (Image.open("desktop/img5.png").convert("L")) img = ImageOps.invert(img) print(np.shape(img))
def main(): args = get_args() assert args.dataset, 'dataset must provide' if args.resume is None: raise ValueError('Must provide --resume when testing.') support_architectures = [ 'ksevendet', ] support_architectures += [f'efficientdet-d{i}' for i in range(8)] support_architectures += [ f'retinanet-res{i}' for i in [18, 34, 50, 101, 152] ] support_architectures.append('retinanet-p45p6') print(support_architectures) if args.architecture == 'ksevendet': ksevendet_cfg = args.model_cfg if ksevendet_cfg.get('variant'): network_name = f'{args.architecture}-{ksevendet_cfg["variant"]}-{ksevendet_cfg["neck"]}' else: assert 0, 'not support now.' assert isinstance(ksevendet_cfg, dict) network_name = f'{args.architecture}-{ksevendet_cfg["backbone"]}_specifical-{ksevendet_cfg["neck"]}' elif args.architecture in support_architectures: network_name = args.architecture else: raise ValueError('Architecture {} is not support.'.format( args.architecture)) args.network_name = network_name net_logger = get_logger(name='Network Logger', args=args) net_logger.info('Positive Threshold: {:.2f}'.format(args.threshold)) _shape_1, _shape_2 = tuple(map(int, args.input_shape.split(','))) _normalizer = Normalizer(inference_mode=True) if args.resize_mode == 0: _resizer = Resizer(min_side=_shape_1, max_side=_shape_2, resize_mode=args.resize_mode, logger=net_logger, inference_mode=True) elif args.resize_mode == 1: _resizer = Resizer(height=_shape_1, width=_shape_2, resize_mode=args.resize_mode, logger=net_logger, inference_mode=True) else: raise ValueError('Illegal resize mode.') transfrom_funcs_valid = [ _normalizer, _resizer, ] transform = transforms.Compose(transfrom_funcs_valid) net_logger.info('Number of Classes: {:>3}'.format(args.num_classes)) build_param = {'logger': net_logger} if args.architecture == 'ksevendet': net_model = ksevendet.KSevenDet(ksevendet_cfg, num_classes=args.num_classes, pretrained=False, **build_param) elif args.architecture == 'retinanet-p45p6': net_model = retinanet.retinanet_p45p6(num_classes=args.num_classes, **build_param) elif args.architecture.split('-')[0] == 'retinanet': net_model = retinanet.build_retinanet(args.architecture, num_classes=args.num_classes, pretrained=False, **build_param) elif args.architecture.split('-')[0] == 'efficientdet': net_model = efficientdet.build_efficientdet( args.architecture, num_classes=args.num_classes, pretrained=False, **build_param) else: assert 0, 'architecture error' net_logger.info('Loading Weights from Checkpoint : {}'.format(args.resume)) net_model.load_state_dict(torch.load(args.resume)) #model = torch.load(args.resume) use_gpu = True if use_gpu: if torch.cuda.is_available(): net_model = net_model.cuda() if torch.cuda.is_available(): net_model = torch.nn.DataParallel(net_model).cuda() else: net_model = torch.nn.DataParallel(net_model) demo_image_files = os.listdir(args.demo_path) demo_image_files.sort() #if len(demo_image_files) > CONVERT_FILE_LIMIT: # print('WARNING: Too many files... total {} files.'.format(len(demo_image_files))) fontsize = 12 score_font = ImageFont.truetype("DejaVuSans.ttf", size=fontsize) net_model.eval() img_array = [] # print(net_model) for f in demo_image_files: #for f in demo_image_files[:1]: # for f in demo_image_files[:100]: #for f in demo_image_files[:min(len(demo_image_files), CONVERT_FILE_LIMIT)]: print(f'inference {f}', end="\r") if f[-3:] not in ['png', 'jpg']: continue #img = skimage.io.imread(os.path.join(args.demo_path, f)) #if len(img.shape) == 2: # img = skimage.color.gray2rgb(img) #print(np.sum(img - a_pil_img)) img = Image.open(os.path.join(args.demo_path, f)).convert('RGB') a_img = np.array(img) # print(a_img) a_img = a_img.astype(np.float32) / 255.0 # print(a_img.shape) a_img = transform(a_img) # print(a_img.shape) a_img = torch.unsqueeze(a_img, 0) # print(a_img.shape) a_img = a_img.permute(0, 3, 1, 2) # print(a_img.shape) # print('predict...') scores, labels, boxes = net_model(a_img, return_loss=False) scores = scores.cpu() labels = labels.cpu() boxes = boxes.cpu() # change to (x, y, w, h) (MS COCO standard) boxes[:, 2] -= boxes[:, 0] boxes[:, 3] -= boxes[:, 1] #if args.dataset == 'thermal': # img = img.resize((80, 60)) draw = ImageDraw.Draw(img) for box_id in range(boxes.shape[0]): score = float(scores[box_id]) label = int(labels[box_id]) box = boxes[box_id, :] # scores are sorted, so we can break if score < args.threshold: break x, y, w, h = box color_ = COLOR_LABEL[label] _text_offset_x, _text_offset_y = 2, 3 #draw.rectangle(tuple([x, y, x+w, y+h]), width = 1, outline ='green') draw.rectangle(tuple([x, y, x + w, y + h]), width=1, outline=color_) draw.text(tuple( [int(x) + _text_offset_x + 1, int(y) + _text_offset_y + 1]), '{:.3f}'.format(score), fill='#000000', font=score_font) draw.text(tuple([int(x) + _text_offset_x, int(y) + _text_offset_y]), '{:.3f}'.format(score), fill=color_, font=score_font) # append detection to results # results.append(image_result) #plt.figure() #plt.imshow(img) #plt.axis('off') #plt.show() img_array.append(np.array(img)) height, width, layers = img_array[0].shape size = (width, height) fps = 30 #fourcc = cv2.VideoWriter_fourcc('m', 'p', '4', 'v') out_video_file = os.path.join( args.output_path, '{}.avi'.format( os.path.basename(args.demo_path) if not args.output_name else args. output_name)) print('Convert to video... {}'.format(out_video_file)) out = cv2.VideoWriter(out_video_file, cv2.VideoWriter_fourcc(*'mp4v'), fps, size) for i in range(len(img_array)): out.write(img_array[i]) out.release() print('Done')