def samples_MatrixNetAnchors(db, k_ind, data_aug, debug): data_rng = system_configs.data_rng batch_size = system_configs.batch_size categories = db.configs["categories"] input_size = db.configs["input_size"] border = db.configs["border"] lighting = db.configs["lighting"] rand_crop = db.configs["rand_crop"] rand_color = db.configs["rand_color"] rand_scales = db.configs["rand_scales"] gaussian_bump = db.configs["gaussian_bump"] gaussian_iou = db.configs["gaussian_iou"] gaussian_rad = db.configs["gaussian_radius"] base_layer_range = db.configs["base_layer_range"] cutout_flag = db.configs["cutout"] max_dim = db.configs["train_image_max_dim"] width_thresholds = db.configs["width_thresholds"] height_thresholds = db.configs["height_thresholds"] layers_range = db.configs["layers_range"] max_tag_len = 256 _dict = {} output_sizes = [] # indexing layer map for i, l in enumerate(layers_range): for j, e in enumerate(l): if e != -1: output_sizes.append([ input_size[0] // (8 * 2**(j)), input_size[1] // (8 * 2**(i)) ]) _dict[(i + 1) * 10 + (j + 1)] = e layers_range = [_dict[i] for i in sorted(_dict)] fpn_flag = set(_dict.keys()) == set([11, 22, 33, 44, 55]) print("FPN", fpn_flag) # allocating memory images = np.zeros((batch_size, 3, input_size[0], input_size[1]), dtype=np.float32) anchors_heatmaps = [ np.zeros((batch_size, categories, output_size[0], output_size[1]), dtype=np.float32) for output_size in output_sizes ] tl_corners_regrs = [ np.zeros((batch_size, max_tag_len, 2), dtype=np.float32) for output_size in output_sizes ] br_corners_regrs = [ np.zeros((batch_size, max_tag_len, 2), dtype=np.float32) for output_size in output_sizes ] anchors_tags = [ np.zeros((batch_size, max_tag_len), dtype=np.int64) for output_size in output_sizes ] tag_masks = [ np.zeros((batch_size, max_tag_len), dtype=bool) for output_size in output_sizes ] tag_lens = [ np.zeros((batch_size, ), dtype=np.int32) for output_size in output_sizes ] db_size = db.db_inds.size for b_ind in range(batch_size): if not debug and k_ind == 0: db.shuffle_inds() db_ind = db.db_inds[k_ind] k_ind = (k_ind + 1) % db_size # reading image image_file = db.image_file(db_ind) image = cv2.imread(image_file) # reading detections detections = db.detections(db_ind) if cutout_flag: image = cutout(image, detections) if not debug and rand_crop: image, detections = random_crop(image, detections, rand_scales, input_size, border=border) else: image, detections = _full_image_crop(image, detections) image, detections = _resize_image(image, detections, input_size) detections = _clip_detections(image, detections) # flipping an image randomly if not debug and np.random.uniform() > 0.5: image[:] = image[:, ::-1, :] width = image.shape[1] detections[:, [0, 2]] = width - detections[:, [2, 0]] - 1 if not debug: image = image.astype(np.float32) / 255. if rand_color: color_jittering_(data_rng, image) if lighting: lighting_(data_rng, image, 0.1, db.eig_val, db.eig_vec) images[b_ind] = image.transpose((2, 0, 1)) for ind, detection in enumerate(detections): for olayer_idx in layer_map_using_ranges( detection[2] - detection[0], detection[3] - detection[1], layers_range, fpn_flag): width_ratio = output_sizes[olayer_idx][1] / input_size[1] height_ratio = output_sizes[olayer_idx][0] / input_size[0] category = int(detection[-1]) - 1 xtl, ytl = detection[0], detection[1] xbr, ybr = detection[2], detection[3] fxtl = (xtl * width_ratio) fytl = (ytl * height_ratio) fxbr = (xbr * width_ratio) fybr = (ybr * height_ratio) mx = output_sizes[olayer_idx][1] - 1 my = output_sizes[olayer_idx][0] - 1 xc = int(min(round((fxtl + fxbr) / 2), mx)) yc = int(min(round((fytl + fybr) / 2), my)) if gaussian_bump: width = detection[2] - detection[0] height = detection[3] - detection[1] width = math.ceil(width * width_ratio) height = math.ceil(height * height_ratio) if gaussian_rad == -1: radius = gaussian_radius((height, width), gaussian_iou) radius = max(0, int(radius)) else: radius = gaussian_rad draw_gaussian( anchors_heatmaps[olayer_idx][b_ind, category], [xc, yc], radius) else: anchors_heatmaps[olayer_idx][b_ind, category, yc, xc] = 1 tag_ind = tag_lens[olayer_idx][b_ind] min_y, max_y, min_x, max_x = map(lambda x: x / 8 / 2, base_layer_range) tl_corners_regrs[olayer_idx][b_ind, tag_ind, :] = [ ((xc - fxtl) - (max_x + min_x) / 2) / (max_x - min_x), ((yc - fytl) - (max_y + min_y) / 2) / (max_y - min_y) ] br_corners_regrs[olayer_idx][b_ind, tag_ind, :] = [ ((fxbr - xc) - (max_x + min_x) / 2) / (max_x - min_x), ((fybr - yc) - (max_y + min_y) / 2) / (max_y - min_y) ] anchors_tags[olayer_idx][ b_ind, tag_ind] = yc * output_sizes[olayer_idx][1] + xc tag_lens[olayer_idx][b_ind] += 1 for b_ind in range(batch_size): for olayer_idx in range(len(tag_lens)): tag_len = tag_lens[olayer_idx][b_ind] tag_masks[olayer_idx][b_ind, :tag_len] = 1 images = [torch.from_numpy(images)] anchors_heatmaps = [ torch.from_numpy(anchors) for anchors in anchors_heatmaps ] tl_corners_regrs = [torch.from_numpy(c) for c in tl_corners_regrs] br_corners_regrs = [torch.from_numpy(c) for c in br_corners_regrs] anchors_tags = [torch.from_numpy(t) for t in anchors_tags] tag_masks = [torch.from_numpy(tags) for tags in tag_masks] return { "xs": [images, anchors_tags], "ys": [anchors_heatmaps, tl_corners_regrs, br_corners_regrs, tag_masks] }, k_ind
def kp_detection(db, k_ind, data_aug, debug): data_rng = system_configs.data_rng batch_size = system_configs.batch_size categories = db.configs["categories"] input_size = db.configs["input_size"] output_size = db.configs["output_sizes"][0] border = db.configs["border"] lighting = db.configs["lighting"] rand_crop = db.configs["rand_crop"] rand_color = db.configs["rand_color"] rand_scales = db.configs["rand_scales"] gaussian_bump = db.configs["gaussian_bump"] gaussian_iou = db.configs["gaussian_iou"] gaussian_rad = db.configs["gaussian_radius"] max_tag_len = 500 # allocating memory images = np.zeros((batch_size, 3, input_size[0], input_size[1]), dtype=np.float32) tl_heatmaps = np.zeros( (batch_size, categories, output_size[0], output_size[1]), dtype=np.float32) br_heatmaps = np.zeros( (batch_size, categories, output_size[0], output_size[1]), dtype=np.float32) ct_heatmaps = np.zeros( (batch_size, categories, output_size[0], output_size[1]), dtype=np.float32) tl_regrs = np.zeros((batch_size, max_tag_len, 2), dtype=np.float32) br_regrs = np.zeros((batch_size, max_tag_len, 2), dtype=np.float32) ct_regrs = np.zeros((batch_size, max_tag_len, 2), dtype=np.float32) tl_tags = np.zeros((batch_size, max_tag_len), dtype=np.int64) br_tags = np.zeros((batch_size, max_tag_len), dtype=np.int64) ct_tags = np.zeros((batch_size, max_tag_len), dtype=np.int64) tag_masks = np.zeros((batch_size, max_tag_len), dtype=np.uint8) tag_lens = np.zeros((batch_size, ), dtype=np.int32) db_size = db.db_inds.size for b_ind in range(batch_size): if not debug and k_ind == 0: db.shuffle_inds() db_ind = db.db_inds[k_ind] k_ind = (k_ind + 1) % db_size # reading image image_file = db.image_file(db_ind) image = cv2.imread(image_file) # print("reading image", image_file) # reading detections detections = db.detections(db_ind) # print("reading detections", detections) # cropping an image randomly if not debug and rand_crop: image, detections = random_crop(image, detections, rand_scales, input_size, border=border) else: image, detections = _full_image_crop(image, detections) image, detections = _resize_image(image, detections, input_size) detections = _clip_detections(image, detections) width_ratio = output_size[1] / input_size[1] height_ratio = output_size[0] / input_size[0] # flipping an image randomly if not debug and np.random.uniform() > 0.5: image[:] = image[:, ::-1, :] width = image.shape[1] detections[:, [0, 2]] = width - detections[:, [2, 0]] - 1 # print("after flopping", detections) if not debug: image = image.astype(np.float32) / 255. if rand_color: # print("before rand color") # print(data_rng) color_jittering_(data_rng, image) # print("this test for color") if lighting: lighting_(data_rng, image, 0.1, db.eig_val, db.eig_vec) # print("after rand color") # image = image.astype(np.float32) / 255. normalize_(image, db.mean, db.std) images[b_ind] = image.transpose((2, 0, 1)) # print("modify detections", detections) for ind, detection in enumerate(detections): category = int(detection[-1]) - 1 #category = 0 xtl, ytl = detection[0], detection[1] xbr, ybr = detection[2], detection[3] xct, yct = (detection[2] + detection[0]) / 2., (detection[3] + detection[1]) / 2. fxtl = (xtl * width_ratio) fytl = (ytl * height_ratio) fxbr = (xbr * width_ratio) fybr = (ybr * height_ratio) fxct = (xct * width_ratio) fyct = (yct * height_ratio) xtl = int(fxtl) ytl = int(fytl) xbr = int(fxbr) ybr = int(fybr) xct = int(fxct) yct = int(fyct) if gaussian_bump: width = detection[2] - detection[0] height = detection[3] - detection[1] width = math.ceil(width * width_ratio) height = math.ceil(height * height_ratio) if gaussian_rad == -1: radius = gaussian_radius((height, width), gaussian_iou) radius = max(0, int(radius)) else: radius = gaussian_rad draw_gaussian(tl_heatmaps[b_ind, category], [xtl, ytl], radius) draw_gaussian(br_heatmaps[b_ind, category], [xbr, ybr], radius) draw_gaussian(ct_heatmaps[b_ind, category], [xct, yct], radius, delte=5) else: tl_heatmaps[b_ind, category, ytl, xtl] = 1 br_heatmaps[b_ind, category, ybr, xbr] = 1 ct_heatmaps[b_ind, category, yct, xct] = 1 tag_ind = tag_lens[b_ind] tl_regrs[b_ind, tag_ind, :] = [fxtl - xtl, fytl - ytl] br_regrs[b_ind, tag_ind, :] = [fxbr - xbr, fybr - ybr] ct_regrs[b_ind, tag_ind, :] = [fxct - xct, fyct - yct] tl_tags[b_ind, tag_ind] = ytl * output_size[1] + xtl br_tags[b_ind, tag_ind] = ybr * output_size[1] + xbr ct_tags[b_ind, tag_ind] = yct * output_size[1] + xct tag_lens[b_ind] += 1 for b_ind in range(batch_size): tag_len = tag_lens[b_ind] tag_masks[b_ind, :tag_len] = 1 images = torch.from_numpy(images) tl_heatmaps = torch.from_numpy(tl_heatmaps) br_heatmaps = torch.from_numpy(br_heatmaps) ct_heatmaps = torch.from_numpy(ct_heatmaps) tl_regrs = torch.from_numpy(tl_regrs) br_regrs = torch.from_numpy(br_regrs) ct_regrs = torch.from_numpy(ct_regrs) tl_tags = torch.from_numpy(tl_tags) br_tags = torch.from_numpy(br_tags) ct_tags = torch.from_numpy(ct_tags) tag_masks = torch.from_numpy(tag_masks) # print("finish this image") return { "xs": [images, tl_tags, br_tags, ct_tags], "ys": [ tl_heatmaps, br_heatmaps, ct_heatmaps, tag_masks, tl_regrs, br_regrs, ct_regrs ] }, k_ind
def kp_detection(db, k_ind, data_aug, debug): data_rng = system_configs.data_rng batch_size = system_configs.batch_size categories = db.configs["categories"] input_size = db.configs["input_size"] output_size = db.configs["output_sizes"][0] border = db.configs["border"] lighting = db.configs["lighting"] rand_crop = db.configs["rand_crop"] rand_color = db.configs["rand_color"] rand_scales = db.configs["rand_scales"] gaussian_bump = db.configs["gaussian_bump"] gaussian_iou = db.configs["gaussian_iou"] gaussian_rad = db.configs["gaussian_radius"] max_tag_len = 128 # allocating memory images = np.zeros((batch_size, 3, input_size[0], input_size[1]), dtype=np.float32) tl_heatmaps = np.zeros( (batch_size, categories, output_size[0], output_size[1]), dtype=np.float32) br_heatmaps = np.zeros( (batch_size, categories, output_size[0], output_size[1]), dtype=np.float32) tl_regrs = np.zeros((batch_size, max_tag_len, 2), dtype=np.float32) br_regrs = np.zeros((batch_size, max_tag_len, 2), dtype=np.float32) tl_tags = np.zeros((batch_size, max_tag_len), dtype=np.int64) br_tags = np.zeros((batch_size, max_tag_len), dtype=np.int64) tag_masks = np.zeros((batch_size, max_tag_len), dtype=np.uint8) tag_lens = np.zeros((batch_size, ), dtype=np.int32) db_size = db.db_inds.size for b_ind in range(batch_size): if not debug and k_ind == 0: db.shuffle_inds() flag = False while not flag: db_ind = db.db_inds[k_ind] k_ind = (k_ind + 1) % db_size # reading image image_file = db.image_file(db_ind) if os.path.exists(image_file): image = cv2.imread(image_file) flag = True # reading detections detections = db.detections(db_ind) # cropping an image randomly if not debug and rand_crop: image, detections = random_crop(image, detections, rand_scales, input_size, border=border) else: image, detections = _full_image_crop(image, detections) #print("Image_size") #print(image.shape) image, detections = _resize_image(image, detections, input_size) detections = _clip_detections(image, detections) width_ratio = output_size[1] / input_size[1] height_ratio = output_size[0] / input_size[0] if not debug: image = image.astype(np.float32) / 255. if rand_color: color_jittering_(data_rng, image) if lighting: lighting_(data_rng, image, 0.1, db.eig_val, db.eig_vec) #normalize_(image, db.mean, db.std) images[b_ind] = image.transpose((2, 0, 1)) for ind, detection in enumerate(detections): category = int(detection[-1]) - 1 #print("Category: %d" %category) #print("Detections: %d" % len(detections)) xtl, ytl = detection[0], detection[1] xbr, ybr = detection[2], detection[3] fxtl = (xtl * width_ratio) fytl = (ytl * height_ratio) fxbr = (xbr * width_ratio) fybr = (ybr * height_ratio) xtl = int(fxtl) ytl = int(fytl) xbr = int(fxbr) ybr = int(fybr) if gaussian_bump: width = detection[2] - detection[0] height = detection[3] - detection[1] width = math.ceil(width * width_ratio) height = math.ceil(height * height_ratio) if gaussian_rad == -1: radius = gaussian_radius((height, width), gaussian_iou) radius = max(0, int(radius)) else: radius = gaussian_rad draw_gaussian(tl_heatmaps[b_ind, category], [xtl, ytl], radius) draw_gaussian(br_heatmaps[b_ind, category], [xbr, ybr], radius) else: tl_heatmaps[b_ind, category, ytl, xtl] = 1 br_heatmaps[b_ind, category, ybr, xbr] = 1 tag_ind = tag_lens[b_ind] tl_regrs[b_ind, tag_ind, :] = [fxtl - xtl, fytl - ytl] br_regrs[b_ind, tag_ind, :] = [fxbr - xbr, fybr - ybr] tl_tags[b_ind, tag_ind] = ytl * output_size[1] + xtl br_tags[b_ind, tag_ind] = ybr * output_size[1] + xbr tag_lens[b_ind] += 1 if tag_lens[b_ind] >= max_tag_len - 1: print("Too many targets, skip!") print(tag_lens[b_ind]) print(image_file) break #print("Pre_tag_ing:%d" %tag_ind) for b_ind in range(batch_size): tag_len = tag_lens[b_ind] tag_masks[b_ind, :tag_len] = 1 images = torch.from_numpy(images) tl_heatmaps = torch.from_numpy(tl_heatmaps) br_heatmaps = torch.from_numpy(br_heatmaps) tl_regrs = torch.from_numpy(tl_regrs) br_regrs = torch.from_numpy(br_regrs) tl_tags = torch.from_numpy(tl_tags) br_tags = torch.from_numpy(br_tags) tag_masks = torch.from_numpy(tag_masks) return { "xs": [images, tl_tags, br_tags], "ys": [tl_heatmaps, br_heatmaps, tag_masks, tl_regrs, br_regrs] }, k_ind
def samples_MatrixNetCorners(db, k_ind, data_aug, debug): data_rng = system_configs.data_rng batch_size = system_configs.batch_size categories = db.configs["categories"] input_size = db.configs["input_size"] border = db.configs["border"] lighting = db.configs["lighting"] rand_crop = db.configs["rand_crop"] rand_color = db.configs["rand_color"] rand_scales = db.configs["rand_scales"] gaussian_bump = db.configs["gaussian_bump"] gaussian_iou = db.configs["gaussian_iou"] gaussian_rad = db.configs["gaussian_radius"] cutout_flag = db.configs["cutout"] max_dim = db.configs["train_image_max_dim"] width_thresholds = db.configs["width_thresholds"] height_thresholds = db.configs["height_thresholds"] layers_range = db.configs["layers_range"] max_tag_len = 128 _dict = {} output_sizes = [] # indexing layer map for i, l in enumerate(layers_range): for j, e in enumerate(l): if e != -1: output_sizes.append([ input_size[0] // (8 * 2**(j)), input_size[1] // (8 * 2**(i)) ]) _dict[(i + 1) * 10 + (j + 1)] = e layers_range = [_dict[i] for i in sorted(_dict)] fpn_flag = set(_dict.keys()) == set([11, 22, 33, 44, 55]) # allocating memory images = np.zeros((batch_size, 3, input_size[0], input_size[1]), dtype=np.float32) tl_heatmaps = [ np.zeros((batch_size, categories, output_size[0], output_size[1]), dtype=np.float32) for output_size in output_sizes ] br_heatmaps = [ np.zeros((batch_size, categories, output_size[0], output_size[1]), dtype=np.float32) for output_size in output_sizes ] tl_regrs = [ np.zeros((batch_size, max_tag_len, 2), dtype=np.float32) for output_size in output_sizes ] center_regrs = [ np.zeros((batch_size, max_tag_len, 2), dtype=np.float32) for output_size in output_sizes ] br_regrs = [ np.zeros((batch_size, max_tag_len, 2), dtype=np.float32) for output_size in output_sizes ] tl_tags = [ np.zeros((batch_size, max_tag_len), dtype=np.int64) for output_size in output_sizes ] br_tags = [ np.zeros((batch_size, max_tag_len), dtype=np.int64) for output_size in output_sizes ] tag_masks = [ np.zeros((batch_size, max_tag_len), dtype=bool) for output_size in output_sizes ] tag_lens = [ np.zeros((batch_size, ), dtype=np.int32) for output_size in output_sizes ] db_size = db.db_inds.size for b_ind in range(batch_size): if not debug and k_ind == 0: db.shuffle_inds() db_ind = db.db_inds[k_ind] k_ind = (k_ind + 1) % db_size # reading image image_file = db.image_file(db_ind) image = cv2.imread(image_file) # reading detections detections = db.detections(db_ind) if cutout_flag: image = cutout(image, detections) if not debug and rand_crop: image, detections = random_crop(image, detections, rand_scales, input_size, border=border) else: image, detections = _full_image_crop(image, detections) image, detections = _resize_image(image, detections, input_size) detections = _clip_detections(image, detections) if False: for j in range(1): color = np.random.random((3, )) * 0.6 + 0.4 color = color * 255 color = color.astype(np.int32).tolist() for bbox in detections: bbox = bbox[0:4].astype(np.int32) cv2.rectangle(image, (bbox[0], bbox[1]), (bbox[2], bbox[3]), color, 2) cv2.imwrite('test.jpg', image) # flipping an image randomly if not debug and np.random.uniform() > 0.5: image[:] = image[:, ::-1, :] width = image.shape[1] detections[:, [0, 2]] = width - detections[:, [2, 0]] - 1 if not debug: image = image.astype(np.float32) / 255. if rand_color: color_jittering_(data_rng, image) if lighting: lighting_(data_rng, image, 0.1, db.eig_val, db.eig_vec) images[b_ind] = image.transpose((2, 0, 1)) for ind, detection in enumerate(detections): for olayer_idx in layer_map_using_ranges( detection[2] - detection[0], detection[3] - detection[1], layers_range, fpn_flag): width_ratio = output_sizes[olayer_idx][1] / input_size[1] height_ratio = output_sizes[olayer_idx][0] / input_size[0] category = int(detection[-1]) - 1 xtl, ytl = detection[0], detection[1] xbr, ybr = detection[2], detection[3] fxtl = (xtl * width_ratio) fytl = (ytl * height_ratio) fxbr = (xbr * width_ratio) fybr = (ybr * height_ratio) mx = output_sizes[olayer_idx][1] - 1 my = output_sizes[olayer_idx][0] - 1 xtl = int(min(round(fxtl), mx)) ytl = int(min(round(fytl), my)) xbr = int(min(round(fxbr), mx)) ybr = int(min(round(fybr), my)) if gaussian_bump: width = detection[2] - detection[0] height = detection[3] - detection[1] width = math.ceil(width * width_ratio) height = math.ceil(height * height_ratio) if gaussian_rad == -1: radius = gaussian_radius((height, width), gaussian_iou) radius = max(0, int(radius)) else: radius = gaussian_rad draw_gaussian(tl_heatmaps[olayer_idx][b_ind, category], [xtl, ytl], radius) draw_gaussian(br_heatmaps[olayer_idx][b_ind, category], [xbr, ybr], radius) else: tl_heatmaps[olayer_idx][b_ind, category, ytl, xtl] = 1 br_heatmaps[olayer_idx][b_ind, category, ybr, xbr] = 1 tag_ind = tag_lens[olayer_idx][b_ind] tl_regrs[olayer_idx][b_ind, tag_ind, :] = [fxtl - xtl, fytl - ytl] br_regrs[olayer_idx][b_ind, tag_ind, :] = [fxbr - xbr, fybr - ybr] center_regrs[olayer_idx][b_ind, tag_ind, :] = [ (fxbr - fxtl) / 2.0 / output_sizes[-1][1], (fybr - fytl) / 2.0 / output_sizes[-1][0] ] tl_tags[olayer_idx][ b_ind, tag_ind] = ytl * output_sizes[olayer_idx][1] + xtl br_tags[olayer_idx][ b_ind, tag_ind] = ybr * output_sizes[olayer_idx][1] + xbr tag_lens[olayer_idx][b_ind] += 1 for b_ind in range(batch_size): for olayer_idx in range(len(tag_lens)): tag_len = tag_lens[olayer_idx][b_ind] tag_masks[olayer_idx][b_ind, :tag_len] = 1 images = [torch.from_numpy(images)] tl_heatmaps = [torch.from_numpy(tl) for tl in tl_heatmaps] br_heatmaps = [torch.from_numpy(br) for br in br_heatmaps] tl_regrs = [torch.from_numpy(tl) for tl in tl_regrs] br_regrs = [torch.from_numpy(br) for br in br_regrs] center_regrs = [torch.from_numpy(c) for c in center_regrs] tl_tags = [torch.from_numpy(tl) for tl in tl_tags] br_tags = [torch.from_numpy(br) for br in br_tags] tag_masks = [torch.from_numpy(tags) for tags in tag_masks] return { "xs": [images, tl_tags, br_tags], "ys": [ tl_heatmaps, br_heatmaps, tag_masks, tl_regrs, br_regrs, center_regrs ] }, k_ind
def kp_detection(db, k_ind, data_aug, debug): ################################################################ # kp_detectin , input whole dataset, # from dataset load a batch images and annotations # based on the annotations build relevant heatmat, regression tag, ################################################################ # train.py--> train()--->init_parallel_jobs --->for each thread: prefetch_data---> sample_data ---> kp_detection # input: in training db is a MSCOCO instance and dataset is trainval2014 # in validation db is a MSCOCO instance and dataset is minival2014 # k_ind first call it is 0, then it will change inside kp_detection method. yes it is k_ind = (k_ind+1)%db_size # data_aug is true when training , and it is false when validating # debug is set in sample_data method. it is set to False in both case data_rng = system_configs.data_rng # check in config.py data_rng = np.random.RandomState(123) batch_size = system_configs.batch_size # check in CenteNet-104.py batch_size = 48 # this is check in COCO class db_config content is listed below, # "db": { # "rand_scale_min": 0.6, # "rand_scale_max": 1.4, # "rand_scale_step": 0.1, # "rand_scales": null, # # "rand_crop": true, # "rand_color": true, # # "border": 128, # "gaussian_bump": true, # # "input_size": [511, 511], # "output_sizes": [[128, 128]], # # "test_scales": [1], # # "top_k": 70, # "categories": 80, # "kp_categories": 1, # "ae_threshold": 0.5, # "nms_threshold": 0.5, # # "max_per_image": 100 # } # and above para is from CenterNet-104.py # if there is any para cant find in CenterNet-104,then goto db/detection.py to chekc categories = db.configs["categories"]# 80 input_size = db.configs["input_size"]# [511,511] output_size = db.configs["output_sizes"][0] # [ 128, 128] border = db.configs["border"] # 128 lighting = db.configs["lighting"] # from detection.py lighting = true rand_crop = db.configs["rand_crop"] # true rand_color = db.configs["rand_color"] # true rand_scales = db.configs["rand_scales"] # check CenterNet-104.json # "rand_scale_min": 0.6, # "rand_scale_max": 1.4, # "rand_scale_step": 0.1, # "rand_scales": null, # and check detection.py # if self._configs["rand_scales"] is None: # self._configs["rand_scales"] = np.arange( # self._configs["rand_scale_min"], # self._configs["rand_scale_max"], # self._configs["rand_scale_step"] # ) # so here rand_scales = np.arange(0.6,1.4,0.1) that is 0.6 0.7 0.8 0.9 .... 1.4 gaussian_bump = db.configs["gaussian_bump"] # from detection.py true gaussian_iou = db.configs["gaussian_iou"] # from detection.py 0.7 gaussian_rad = db.configs["gaussian_radius"] # from detection.py -1 max_tag_len = 128 # allocating memory images = np.zeros((batch_size, 3, input_size[0], input_size[1]), dtype=np.float32) # 48 ,3 , 511, 511 tl_heatmaps = np.zeros((batch_size, categories, output_size[0], output_size[1]), dtype=np.float32) # 48 , 80 , 128 , 128 br_heatmaps = np.zeros((batch_size, categories, output_size[0], output_size[1]), dtype=np.float32) # 48 , 80 , 128 , 128 ct_heatmaps = np.zeros((batch_size, categories, output_size[0], output_size[1]), dtype=np.float32) # 48 , 80 , 128 , 128 tl_regrs = np.zeros((batch_size, max_tag_len, 2), dtype=np.float32) # 48 , 128 , 2 br_regrs = np.zeros((batch_size, max_tag_len, 2), dtype=np.float32) # 48 , 128 , 2 ct_regrs = np.zeros((batch_size, max_tag_len, 2), dtype=np.float32) # 48 , 128 , 2 tl_tags = np.zeros((batch_size, max_tag_len), dtype=np.int64) br_tags = np.zeros((batch_size, max_tag_len), dtype=np.int64) ct_tags = np.zeros((batch_size, max_tag_len), dtype=np.int64) tag_masks = np.zeros((batch_size, max_tag_len), dtype=np.uint8) tag_lens = np.zeros((batch_size, ), dtype=np.int32) # 48 , db_size = db.db_inds.size # back to db/coco.py to check db.db_inds # self._db_inds = np.arange(len(self._image_ids)) # so here db_size means how many images does this dataset has. eg.10000 images then db_size = 10000 for b_ind in range(batch_size): # iterate images one by one if not debug and k_ind == 0: db.shuffle_inds() # since when we call, we always set debug to False no matter it is training or validation # and k_ind only have one chance to be 0, that is when we first call ke_detection # this shuffle_inds() method is written in base.py db_ind = db.db_inds[k_ind] # db_inds are shuffled in the first iteration, then take the index useing k_ind k_ind = (k_ind + 1) % db_size # # reading image image_file = db.image_file(db_ind) image = cv2.imread(image_file) # reading detections detections = db.detections(db_ind) # db is a MSCOCO instance, and MSCOCO.detection is written in db/coco.py # in train.py , MSCOCO is initialized and MSCOCO._detections are filled with all annotations infomation. # here db.detections(db_ind) # db_ind is the id of an image # then use the id to get the annotation of that image # so here detections is the label infomation of a single image # cropping an image randomly if not debug and rand_crop: image, detections = random_crop(image, detections, rand_scales, input_size, border=border) # image is cropped and detections(bounding box is changed at the same time) else: image, detections = _full_image_crop(image, detections) image, detections = _resize_image(image, detections, input_size) # resize image and detections to another shape at the same time. # And there is risk that the detections are not within the boundaries of the image. detections = _clip_detections(image, detections) # so here clip the detections keep you away from above metioned risk. # make all the detections within the boundaries width_ratio = output_size[1] / input_size[1] height_ratio = output_size[0] / input_size[0] #input size and output size can be found in CenterNet-104.json # input size = 511,511 # output size = 128,128 # so width_ratio = 511/128 = 3.9921875 # flipping an image randomly if not debug and np.random.uniform() > 0.5: image[:] = image[:, ::-1, :] width = image.shape[1] detections[:, [0, 2]] = width - detections[:, [2, 0]] - 1 if not debug: image = image.astype(np.float32) / 255. if rand_color: color_jittering_(data_rng, image) if lighting: lighting_(data_rng, image, 0.1, db.eig_val, db.eig_vec) normalize_(image, db.mean, db.std) images[b_ind] = image.transpose((2, 0, 1)) # make image to be channel first for ind, detection in enumerate(detections): # all these operations are for one single image # since below code will apply scale to detections, # detections should be integers not within (0,1) range category = int(detection[-1]) - 1 #category = 0 xtl, ytl = detection[0], detection[1] xbr, ybr = detection[2], detection[3] xct, yct = (detection[2] + detection[0])/2., (detection[3]+detection[1])/2. fxtl = (xtl * width_ratio) fytl = (ytl * height_ratio) fxbr = (xbr * width_ratio) fybr = (ybr * height_ratio) fxct = (xct * width_ratio) fyct = (yct * height_ratio) xtl = int(fxtl) ytl = int(fytl) xbr = int(fxbr) ybr = int(fybr) xct = int(fxct) yct = int(fyct) if gaussian_bump: # CenterNet-104 set to true width = detection[2] - detection[0]# original value height = detection[3] - detection[1] width = math.ceil(width * width_ratio) # multiply ratio so it is for output size height = math.ceil(height * height_ratio) if gaussian_rad == -1:# -1 means auto calculate gaussian rad # match CenterNet-104 setting radius = gaussian_radius((height, width), gaussian_iou) # gaussian_iou = 0.7 radius = max(0, int(radius)) # eg. if an obj bounding box is 50,80, then the radius is just 17 or so else: radius = gaussian_rad draw_gaussian(tl_heatmaps[b_ind, category], [xtl, ytl], radius) draw_gaussian(br_heatmaps[b_ind, category], [xbr, ybr], radius) draw_gaussian(ct_heatmaps[b_ind, category], [xct, yct], radius, delte = 5) # all three inputs are zeros with shape 48 , 80 , 128 , 128 # tl_heatmaps[b_ind, category] is 128 x 128 # top left corner # bottom right corner # center corner each one has an heatmap # about the delte para , topleft and bottom right are both set to 6, # why center heatmap set it to 5? # in draw_gaussian: sigma=diameter / delte so the bigger delte ,the smaller sigma, and the heatmap value # in that keypoint is higher, # here it set the heatmap value of center keypoint larger than two corner keypoints. # important****** the else: tl_heatmaps[b_ind, category, ytl, xtl] = 1 br_heatmaps[b_ind, category, ybr, xbr] = 1 ct_heatmaps[b_ind, category, yct, xct] = 1 # if---else if is using gaussian distribution,and else if use only one peak tag_ind = tag_lens[b_ind] # tag_lens is (batch_size,) # and b_ind is the image index within batch # tag_lens is used to store how many detections the image has. # you can confirm with 6 lines below tl_regrs[b_ind, tag_ind, :] = [fxtl - xtl, fytl - ytl] br_regrs[b_ind, tag_ind, :] = [fxbr - xbr, fybr - ybr] ct_regrs[b_ind, tag_ind, :] = [fxct - xct, fyct - yct] # all the three regression varibles are 3 dementional. # (b_ind,tag_ind,2) # for example. in one batch we have 48 images, # for each image we have differnt numbers of detections, may be first image has 4 detections. # may be the second has 15 detections. # but when we forward the network,we need it to have stable shape. # so here is how these arrays are initialized. # ct_regrs = np.zeros((batch_size, max_tag_len, 2), dtype=np.float32) tl_tags[b_ind, tag_ind] = ytl * output_size[1] + xtl br_tags[b_ind, tag_ind] = ybr * output_size[1] + xbr ct_tags[b_ind, tag_ind] = yct * output_size[1] + xct # these 3 arrays are used together with above three arrays. # these 3 are used to store the integer part of the scale to outputsize detection # the above 3 variables are used to store the fractions. # ct_tags = np.zeros((batch_size, max_tag_len), dtype=np.int64) tag_lens[b_ind] += 1 for b_ind in range(batch_size): # for image in batches tag_len = tag_lens[b_ind]# how many detections the image has tag_masks[b_ind, :tag_len] = 1 # tag_masks first appears in the begining of this method. # tag_masks = np.zeros((batch_size, max_tag_len), dtype=np.uint8) this is how it is initialized images = torch.from_numpy(images) tl_heatmaps = torch.from_numpy(tl_heatmaps) br_heatmaps = torch.from_numpy(br_heatmaps) ct_heatmaps = torch.from_numpy(ct_heatmaps) tl_regrs = torch.from_numpy(tl_regrs) br_regrs = torch.from_numpy(br_regrs) ct_regrs = torch.from_numpy(ct_regrs) tl_tags = torch.from_numpy(tl_tags) br_tags = torch.from_numpy(br_tags) ct_tags = torch.from_numpy(ct_tags) tag_masks = torch.from_numpy(tag_masks) return { "xs": [images, tl_tags, br_tags, ct_tags], "ys": [tl_heatmaps, br_heatmaps, ct_heatmaps, tag_masks, tl_regrs, br_regrs, ct_regrs] }, k_ind
def kp_detection(db, k_ind, data_aug, debug): data_rng = system_configs.data_rng batch_size = system_configs.batch_size categories = db.configs["categories"] input_size = db.configs["input_size"] output_size = db.configs["output_sizes"][0] border = db.configs["border"] lighting = db.configs["lighting"] rand_crop = db.configs["rand_crop"] rand_color = db.configs["rand_color"] rand_scales = db.configs["rand_scales"] gaussian_bump = db.configs["gaussian_bump"] gaussian_iou = db.configs["gaussian_iou"] gaussian_rad = db.configs["gaussian_radius"] max_tag_len = 128 # allocating memory images = np.zeros((batch_size, 3, input_size[0], input_size[1]), dtype=np.float32) t_heatmaps = np.zeros( (batch_size, categories, output_size[0], output_size[1]), dtype=np.float32) l_heatmaps = np.zeros( (batch_size, categories, output_size[0], output_size[1]), dtype=np.float32) b_heatmaps = np.zeros( (batch_size, categories, output_size[0], output_size[1]), dtype=np.float32) r_heatmaps = np.zeros( (batch_size, categories, output_size[0], output_size[1]), dtype=np.float32) ct_heatmaps = np.zeros( (batch_size, categories, output_size[0], output_size[1]), dtype=np.float32) t_regrs = np.zeros((batch_size, max_tag_len, 2), dtype=np.float32) l_regrs = np.zeros((batch_size, max_tag_len, 2), dtype=np.float32) b_regrs = np.zeros((batch_size, max_tag_len, 2), dtype=np.float32) r_regrs = np.zeros((batch_size, max_tag_len, 2), dtype=np.float32) t_tags = np.zeros((batch_size, max_tag_len), dtype=np.int64) l_tags = np.zeros((batch_size, max_tag_len), dtype=np.int64) b_tags = np.zeros((batch_size, max_tag_len), dtype=np.int64) r_tags = np.zeros((batch_size, max_tag_len), dtype=np.int64) ct_tags = np.zeros((batch_size, max_tag_len), dtype=np.int64) tag_masks = np.zeros((batch_size, max_tag_len), dtype=np.uint8) tag_lens = np.zeros((batch_size, ), dtype=np.int32) db_size = db.db_inds.size for b_ind in range(batch_size): if not debug and k_ind == 0: db.shuffle_inds() db_ind = db.db_inds[k_ind] k_ind = (k_ind + 1) % db_size # reading image image_file = db.image_file(db_ind) image = cv2.imread(image_file) # reading detections detections, extreme_pts = db.detections(db_ind) # cropping an image randomly if rand_crop: image, detections, extreme_pts = random_crop_pts(image, detections, extreme_pts, rand_scales, input_size, border=border) else: assert 0 # image, detections = _full_image_crop(image, detections) image, detections, extreme_pts = _resize_image_pts( image, detections, extreme_pts, input_size) detections, extreme_pts = _clip_detections_pts(image, detections, extreme_pts) width_ratio = output_size[1] / input_size[1] height_ratio = output_size[0] / input_size[0] # flipping an image randomly if np.random.uniform() > 0.5: image[:] = image[:, ::-1, :] width = image.shape[1] detections[:, [0, 2]] = width - detections[:, [2, 0]] - 1 extreme_pts[:, :, 0] = width - extreme_pts[:, :, 0] - 1 extreme_pts[:, 1, :], extreme_pts[:, 3, :] = \ extreme_pts[:, 3, :].copy(), extreme_pts[:, 1, :].copy() image = image.astype(np.float32) / 255. if not debug: if rand_color: color_jittering_(data_rng, image) if lighting: lighting_(data_rng, image, 0.1, db.eig_val, db.eig_vec) normalize_(image, db.mean, db.std) images[b_ind] = image.transpose((2, 0, 1)) for ind, detection in enumerate(detections): category = int(detection[-1]) - 1 extreme_pt = extreme_pts[ind] xt, yt = extreme_pt[0, 0], extreme_pt[0, 1] xl, yl = extreme_pt[1, 0], extreme_pt[1, 1] xb, yb = extreme_pt[2, 0], extreme_pt[2, 1] xr, yr = extreme_pt[3, 0], extreme_pt[3, 1] xct = (xl + xr) / 2 yct = (yt + yb) / 2 fxt = (xt * width_ratio) fyt = (yt * height_ratio) fxl = (xl * width_ratio) fyl = (yl * height_ratio) fxb = (xb * width_ratio) fyb = (yb * height_ratio) fxr = (xr * width_ratio) fyr = (yr * height_ratio) fxct = (xct * width_ratio) fyct = (yct * height_ratio) xt = int(fxt) yt = int(fyt) xl = int(fxl) yl = int(fyl) xb = int(fxb) yb = int(fyb) xr = int(fxr) yr = int(fyr) xct = int(fxct) yct = int(fyct) if gaussian_bump: width = detection[2] - detection[0] height = detection[3] - detection[1] width = math.ceil(width * width_ratio) height = math.ceil(height * height_ratio) if gaussian_rad == -1: radius = gaussian_radius((height, width), gaussian_iou) radius = max(0, int(radius)) else: radius = gaussian_rad draw_gaussian(t_heatmaps[b_ind, category], [xt, yt], radius) draw_gaussian(l_heatmaps[b_ind, category], [xl, yl], radius) draw_gaussian(b_heatmaps[b_ind, category], [xb, yb], radius) draw_gaussian(r_heatmaps[b_ind, category], [xr, yr], radius) draw_gaussian(ct_heatmaps[b_ind, category], [xct, yct], radius) else: t_heatmaps[b_ind, category, yt, xt] = 1 l_heatmaps[b_ind, category, yl, xl] = 1 b_heatmaps[b_ind, category, yb, xb] = 1 r_heatmaps[b_ind, category, yr, xr] = 1 tag_ind = tag_lens[b_ind] t_regrs[b_ind, tag_ind, :] = [fxt - xt, fyt - yt] l_regrs[b_ind, tag_ind, :] = [fxl - xl, fyl - yl] b_regrs[b_ind, tag_ind, :] = [fxb - xb, fyb - yb] r_regrs[b_ind, tag_ind, :] = [fxr - xr, fyr - yr] t_tags[b_ind, tag_ind] = yt * output_size[1] + xt l_tags[b_ind, tag_ind] = yl * output_size[1] + xl b_tags[b_ind, tag_ind] = yb * output_size[1] + xb r_tags[b_ind, tag_ind] = yr * output_size[1] + xr ct_tags[b_ind, tag_ind] = yct * output_size[1] + xct tag_lens[b_ind] += 1 for b_ind in range(batch_size): tag_len = tag_lens[b_ind] tag_masks[b_ind, :tag_len] = 1 if debug: debugger = Debugger(num_classes=80) t_hm = debugger.gen_colormap(t_heatmaps[0]) l_hm = debugger.gen_colormap(l_heatmaps[0]) b_hm = debugger.gen_colormap(b_heatmaps[0]) r_hm = debugger.gen_colormap(r_heatmaps[0]) ct_hm = debugger.gen_colormap(ct_heatmaps[0]) img = images[0] * db.std.reshape(3, 1, 1) + db.mean.reshape(3, 1, 1) img = (img * 255).astype(np.uint8).transpose(1, 2, 0) debugger.add_blend_img(img, t_hm, 't_hm') debugger.add_blend_img(img, l_hm, 'l_hm') debugger.add_blend_img(img, b_hm, 'b_hm') debugger.add_blend_img(img, r_hm, 'r_hm') debugger.add_blend_img( img, np.maximum(np.maximum(t_hm, l_hm), np.maximum(b_hm, r_hm)), 'extreme') debugger.add_blend_img(img, ct_hm, 'center') debugger.show_all_imgs(pause=True) images = torch.from_numpy(images) t_heatmaps = torch.from_numpy(t_heatmaps) l_heatmaps = torch.from_numpy(l_heatmaps) b_heatmaps = torch.from_numpy(b_heatmaps) r_heatmaps = torch.from_numpy(r_heatmaps) ct_heatmaps = torch.from_numpy(ct_heatmaps) t_regrs = torch.from_numpy(t_regrs) l_regrs = torch.from_numpy(l_regrs) b_regrs = torch.from_numpy(b_regrs) r_regrs = torch.from_numpy(r_regrs) t_tags = torch.from_numpy(t_tags) l_tags = torch.from_numpy(l_tags) b_tags = torch.from_numpy(b_tags) r_tags = torch.from_numpy(r_tags) ct_tags = torch.from_numpy(ct_tags) tag_masks = torch.from_numpy(tag_masks) return { "xs": [images, t_tags, l_tags, b_tags, r_tags, ct_tags], "ys": [ t_heatmaps, l_heatmaps, b_heatmaps, r_heatmaps, ct_heatmaps, tag_masks, t_regrs, l_regrs, b_regrs, r_regrs ] }, k_ind
def kp_detection(db, k_ind): data_rng = system_configs.data_rng batch_size = system_configs.batch_size input_size = db.configs["input_size"] lighting = db.configs["lighting"] rand_color = db.configs["rand_color"] images = np.zeros((batch_size, 3, input_size[0], input_size[1]), dtype=np.float32) # b, 3, H, W masks = np.zeros((batch_size, 1, input_size[0], input_size[1]), dtype=np.float32) # b, 1, H, W gt_lanes = [] db_size = db.db_inds.size # 3268 | 2782 for b_ind in range(batch_size): if k_ind == 0: db.shuffle_inds() db_ind = db.db_inds[k_ind] k_ind = (k_ind + 1) % db_size # reading ground truth item = db.detections(db_ind) # all in the raw coordinate img = cv2.imread(item['path']) mask = np.ones((1, img.shape[0], img.shape[1], 1), dtype=np.bool) label = item['label'] transform = True if transform: line_strings = db.lane_to_linestrings(item['old_anno']['lanes']) line_strings = LineStringsOnImage(line_strings, shape=img.shape) img, line_strings, mask = db.transform(image=img, line_strings=line_strings, segmentation_maps=mask) line_strings.clip_out_of_image_() new_anno = {'path': item['path'], 'lanes': db.linestrings_to_lanes(line_strings)} new_anno['categories'] = item['categories'] label = db._transform_annotation(new_anno, img_wh=(input_size[1], input_size[0]))['label'] # clip polys tgt_ids = label[:, 0] label = label[tgt_ids > 0] # make lower the same label[:, 1][label[:, 1] < 0] = 1 label[:, 1][...] = np.min(label[:, 1]) label = np.stack([label] * batch_size, axis=0) gt_lanes.append(torch.from_numpy(label.astype(np.float32))) img = (img / 255.).astype(np.float32) if rand_color: color_jittering_(data_rng, img) if lighting: lighting_(data_rng, img, 0.1, db.eig_val, db.eig_vec) normalize_(img, db.mean, db.std) images[b_ind] = img.transpose((2, 0, 1)) masks[b_ind] = np.logical_not(mask[:, :, :, 0]) images = torch.from_numpy(images) masks = torch.from_numpy(masks) return { "xs": [images, masks], "ys": [images, *gt_lanes] }, k_ind
def kp_detection(db, k_ind, data_aug, debug): data_rng = system_configs.data_rng batch_size = system_configs.batch_size categories = db.configs["categories"] input_size = db.configs["input_size"] output_size = db.configs["output_sizes"][0] border = db.configs["border"] lighting = db.configs["lighting"] rand_crop = db.configs["rand_crop"] rand_color = db.configs["rand_color"] rand_scales = db.configs["rand_scales"] gaussian_bump = db.configs["gaussian_bump"] gaussian_iou = db.configs["gaussian_iou"] gaussian_rad = db.configs["gaussian_radius"] max_tag_len = 256 max_tag_len_group = 128 max_group_len = 16 # allocating memory images = np.zeros((batch_size, 3, input_size[0], input_size[1]), dtype=np.float32) key_heatmaps = np.zeros( (batch_size, categories, output_size[0], output_size[1]), dtype=np.float32) hybrid_heatmaps = np.zeros( (batch_size, categories, output_size[0], output_size[1]), dtype=np.float32) key_regrs = np.zeros((batch_size, max_tag_len, 2), dtype=np.float32) key_tags = np.zeros((batch_size, max_tag_len), dtype=np.int64) key_tags_grouped = np.zeros((batch_size, max_group_len, max_tag_len_group), dtype=np.int64) tag_masks = np.zeros((batch_size, max_tag_len), dtype=np.uint8) tag_masks_grouped = np.zeros( (batch_size, max_group_len, max_tag_len_group), dtype=np.uint8) hybrid_masks_grouped = np.zeros( (batch_size, max_group_len, max_tag_len_group), dtype=np.uint8) tag_lens = np.zeros((batch_size, ), dtype=np.int32) tag_group_lens = np.zeros((batch_size, ), dtype=np.int32) db_size = db.db_inds.size for b_ind in range(batch_size): if not debug and k_ind == 0: db.shuffle_inds() flag = False while not flag: db_ind = db.db_inds[k_ind] k_ind = (k_ind + 1) % db_size # reading image image_file = db.image_file(db_ind) # print(image_file) image = cv2.imread(image_file) if image.any() != None: flag = True temp = db.detections(db_ind) if temp == None: flag = False ori_size = image.shape #print(temp) (detections, categories) = temp detections = detections[0:max_group_len] categories = categories[0:max_group_len] # cropping an image randomly if not debug and rand_crop: image, detections, scale = random_crop_line(image, detections, rand_scales, input_size, border=border) else: image, detections = _full_image_crop(image, detections) # print("Image_size") # print(image.shape) image, detections = _resize_image(image, detections, input_size) detections = _clip_detections(image, detections) width_ratio = output_size[1] / input_size[1] height_ratio = output_size[0] / input_size[0] if not debug: image = image.astype(np.float32) / 255. if rand_color: color_jittering_(data_rng, image) if lighting: lighting_(data_rng, image, 0.1, db.eig_val, db.eig_vec) # normalize_(image, db.mean, db.std) images[b_ind] = image.transpose((2, 0, 1)) for ind, (detection, category) in enumerate(zip(detections, categories)): category = 0 # print("Category: %d" %category) # print("Detections: %d" % len(detections)) fdetection = detection.copy() fdetection[0:len(fdetection ):2] = detection[0:len(detection):2] * width_ratio fdetection[1:len(fdetection): 2] = detection[1:len(detection):2] * height_ratio detection = fdetection.astype(np.int32) if gaussian_bump: width = ori_size[1] / 50 / 4 / scale height = ori_size[0] / 50 / 4 / scale if gaussian_rad == -1: radius = gaussian_radius((height, width), gaussian_iou) radius = max(0, int(radius)) else: radius = gaussian_rad for k in range(int(len(detection) / 2)): if not (detection[2 * k] == 0 or detection[2 * k + 1] == 0 or detection[2 * k] >= (output_size[1] - 1e-2) or detection[2 * k + 1] >= (output_size[0] - 1e-2)): if key_heatmaps[b_ind, category, detection[2 * k + 1], detection[2 * k]] < 0.85: draw_gaussian( key_heatmaps[b_ind, category], [detection[2 * k], detection[2 * k + 1]], radius) else: draw_gaussian( key_heatmaps[b_ind, category], [detection[2 * k], detection[2 * k + 1]], radius) draw_gaussian( hybrid_heatmaps[b_ind, category], [detection[2 * k], detection[2 * k + 1]], radius) else: key_heatmaps[b_ind, category, detection[2 * k + 1], detection[2 * k]] = 1 for k in range(int(len(detection) / 2)): if not (detection[2 * k] == 0 or detection[2 * k + 1] == 0 or detection[2 * k] >= (output_size[1] - 1e-2) or detection[2 * k + 1] >= (output_size[0] - 1e-2)): if tag_lens[ b_ind] >= max_tag_len - 1 or k > max_tag_len_group - 1: print("Too many targets, skip!") print(tag_lens[b_ind]) print(image_file) break tag_ind = tag_lens[b_ind] key_regrs[b_ind, tag_ind, :] = [ fdetection[2 * k] - detection[2 * k], fdetection[2 * k + 1] - detection[2 * k + 1] ] key_tags[b_ind, tag_ind] = detection[ 2 * k + 1] * output_size[1] + detection[2 * k] key_tags_grouped[b_ind, ind, k] = detection[ 2 * k + 1] * output_size[1] + detection[2 * k] tag_lens[b_ind] += 1 if hybrid_heatmaps[b_ind, category, detection[2 * k + 1], detection[2 * k]] < 0.85: tag_masks_grouped[b_ind, ind, k] = 1 # print("Pre_tag_ing:%d" %tag_ind) tag_len = tag_lens[b_ind] tag_group_lens[b_ind] += 1 tag_masks[b_ind, :tag_len] = 1 tag_masks_grouped = tag_masks_grouped * (1 - hybrid_masks_grouped) images = torch.from_numpy(images) key_heatmaps = torch.from_numpy(key_heatmaps) key_regrs = torch.from_numpy(key_regrs) key_tags = torch.from_numpy(key_tags) tag_masks = torch.from_numpy(tag_masks) key_tags_grouped = torch.from_numpy(key_tags_grouped) tag_group_lens = torch.from_numpy(tag_group_lens) hybrid_heatmaps = torch.from_numpy(hybrid_heatmaps) tag_masks_grouped = torch.from_numpy(tag_masks_grouped) return { "xs": [images, key_tags, key_tags_grouped, tag_group_lens], "ys": [ key_heatmaps, hybrid_heatmaps, tag_masks, tag_masks_grouped, key_regrs ] }, k_ind
def kp_detection(db, k_ind, data_aug, debug): data_rng = system_configs.data_rng batch_size = system_configs.batch_size categories = db.configs["categories"] input_size = db.configs["input_size"] output_size = db.configs["output_sizes"][0] border = db.configs["border"] lighting = db.configs["lighting"] and data_aug rand_crop = db.configs["rand_crop"] rand_color = db.configs["rand_color"] and data_aug rand_scales = db.configs["rand_scales"] gaussian_bump = db.configs["gaussian_bump"] gaussian_iou = db.configs["gaussian_iou"] gaussian_rad = db.configs["gaussian_radius"] max_tag_len = 16 max_group_len = 16 num_feature = 8 # allocating memory images = np.zeros((batch_size, 3, input_size[0], input_size[1]), dtype=np.float32) ps_tags = np.zeros((batch_size, max_tag_len * 8 * 4), dtype=np.int64) ng_tags = np.zeros((batch_size, max_tag_len * 8 * 4), dtype=np.int64) ps_weights = np.zeros((batch_size, max_tag_len * 8 * 4), dtype=np.float32) ng_weights = np.zeros((batch_size, max_tag_len * 8 * 4), dtype=np.float32) tag_masks_ps = np.zeros((batch_size, max_tag_len), dtype=np.uint8) tag_masks_ng = np.zeros((batch_size, max_tag_len), dtype=np.uint8) db_size = db.db_inds.size for b_ind in range(batch_size): if not debug and k_ind == 0: db.shuffle_inds() flag = False while not flag: db_ind = db.db_inds[k_ind] k_ind = (k_ind + 1) % db_size # reading image image_file = db.image_file(db_ind) # print(image_file) image = cv2.imread(image_file) if image.any() != None: flag = True (ps_detections, ng_detections) = db.detections(db_ind) if ps_detections is None: flag = False continue if len(ps_detections) < 1: flag = False continue ori_size = image.shape #print(temp) ps_detections = np.array(ps_detections) ng_detections = np.array(ng_detections) # cropping an image randomly if not debug and rand_crop: image, ps_detections, ng_detections, scale = random_crop_line( image, ps_detections, ng_detections, rand_scales, input_size, border=border) else: image, detections = _full_image_crop(image, detections) # print("Image_size") # print(image.shape) image, ps_detections, ng_detections = _resize_image( image, ps_detections, ng_detections, input_size) ps_detections, ng_detections = _clip_detections( image, ps_detections, ng_detections) if len(ps_detections) < 1: flag = False np.random.shuffle(ps_detections) np.random.shuffle(ng_detections) ps_detections = ps_detections[0:max_group_len] ng_detections = ng_detections[0:max_group_len] #cv2.imwrite('test.png', image) width_ratio = output_size[1] / input_size[1] height_ratio = output_size[0] / input_size[0] if not debug: image = image.astype(np.float32) / 255. if rand_color: color_jittering_(data_rng, image) if lighting: lighting_(data_rng, image, 0.1, db.eig_val, db.eig_vec) # normalize_(image, db.mean, db.std) images[b_ind] = image.transpose((2, 0, 1)) ps_fdetections = ps_detections.copy() ps_fdetections[:, :, 0] = ps_detections[:, :, 0] * width_ratio ps_fdetections[:, :, 1] = ps_detections[:, :, 1] * height_ratio ng_fdetections = ng_detections.copy() if len(ng_detections) > 0: ng_fdetections[:, :, 0] = ng_detections[:, :, 0] * width_ratio ng_fdetections[:, :, 1] = ng_detections[:, :, 1] * height_ratio tag_ind = 0 for k in range(len(ps_detections)): sp = ps_fdetections[k, 0] ep = ps_fdetections[k, 1] p_points, p_weights = _get_sample_point(sp, ep, num_feature) for kth in range(len(p_points)): p_point = p_points[kth] p_weight = p_weights[kth] for sth in range(4): ps_tags[b_ind, tag_ind + sth] = p_point[sth][1] * output_size[1] + p_point[ sth][0] ps_weights[b_ind, tag_ind + sth] = p_weight[sth] tag_ind += 4 tag_masks_ps[b_ind, k] = 1 tag_ind = 0 for k in range(len(ng_detections)): sp = ng_fdetections[k, 0] ep = ng_fdetections[k, 1] n_points, n_weights = _get_sample_point(sp, ep, num_feature) for kth in range(len(n_points)): n_point = n_points[kth] n_weight = n_weights[kth] for sth in range(4): ng_tags[b_ind, tag_ind + sth] = n_point[sth][1] * output_size[1] + n_point[ sth][0] ng_weights[b_ind, tag_ind + sth] = n_weight[sth] tag_ind += 4 tag_masks_ng[b_ind, k] = 1 ps_tags = np.clip(ps_tags, 0, 127 * 127) ng_tags = np.clip(ng_tags, 0, 127 * 127) images = torch.from_numpy(images) ps_tags = torch.from_numpy(ps_tags) ng_tags = torch.from_numpy(ng_tags) ps_weights = torch.from_numpy(ps_weights) ng_weights = torch.from_numpy(ng_weights) tag_masks_ps = torch.from_numpy(tag_masks_ps) tag_masks_ng = torch.from_numpy(tag_masks_ng) return { "xs": [images, ps_tags, ng_tags, ps_weights, ng_weights], "ys": [ torch.zeros([batch_size, 16], dtype=torch.int64), torch.ones([batch_size, 16], dtype=torch.int64), tag_masks_ps, tag_masks_ng ] }, k_ind