def display_anchors(fmap_w, fmap_h, s): d2l.set_figsize((3.5, 2.5)) fig = plt.imshow(img) feature_map_size = (fmap_h, fmap_w) # (h,w) anchors = d2l.MultiBoxPrior(feature_map_size, sizes=s, ratios=[1, 2, 0.5]) bbox_scale = torch.tensor([[w, h, w, h]], dtype=torch.float32) d2l.show_bboxes(fig.axes, anchors * bbox_scale) plt.savefig('multiscale_anchor_{}_{}.png'.format(fmap_h, fmap_w))
def display_anchors(fmap_w, fmap_h, s): # 前两维的取值不影响输出结果(原书这里是(1, 10, fmap_w, fmap_h), 我认为错了) fmap = torch.zeros((1, 10, fmap_h, fmap_w), dtype=torch.float32) # 平移所有锚框使均匀分布在图片上 offset_x, offset_y = 1.0 / fmap_w, 1.0 / fmap_h anchors = d2l.MultiBoxPrior(fmap, sizes=s, ratios=[1, 2, 0.5]) + \ torch.tensor([offset_x/2, offset_y/2, offset_x/2, offset_y/2]) bbox_scale = torch.tensor([[w, h, w, h]], dtype=torch.float32) d2l.show_bboxes(d2l.plt.imshow(img).axes, anchors[0] * bbox_scale)
def display(img, output): d2l.set_figsize((5, 5)) fig = d2l.plt.imshow(img) h, w = img.shape[0:2] bbox_scale = torch.tensor([w, h, w, h], dtype=torch.float32) bbox_list = [] labels_list = [] for row in output: bbox_list.append(row[2:6] * bbox_scale) labels_list.append('%.2f' % row[1]) d2l.show_bboxes(fig.axes, bbox_list, labels_list, 'w') plt.savefig('pikachu_detect.png')
def display_anchors(fmap_w, fmap_h, s): # 前两维的取值不影响输出结果(原书这里是(1, 10, fmap_w, fmap_h), 我认为错了) fmap = torch.zeros((1, 10, fmap_h, fmap_w), dtype=torch.float32) # 平移所有锚框使均匀分布在图片上 offset_x, offset_y = 1.0 / fmap_w, 1.0 / fmap_h anchors = d2l.MultiBoxPrior(fmap, sizes=s, ratios=[ 1, 2, 0.5 ]) + torch.tensor([offset_x / 2, offset_y / 2, offset_x / 2, offset_y / 2]) # d2l.MultiBoxPrior函数用处:指定输入(fmap)、一组大小和一组宽高比,该函数将返回输入的所有锚框。 ''' 这里之所以说会均匀采样,是因为在图像位置标示值中都采用了归一化,及所有图像上的位置都可以用两个0到1的数表示。 通过 anchors=d2l.MultiBoxPrior(fmap,sizes=s,ratios=[1,2,0.5])+torch.tensor([offset_x/2,offset_y/2,offset_x/2,offset_y/2]) 得到的Anchors是针对fmap的anchor,其形状为1,fmap的像素高宽乘积再乘上设定的锚框高宽比长度,4 其实就是返回fmap的像素高宽乘积再乘上设定的锚框高宽比长度个锚框,每个锚框包含4个坐标,坐标值为归一化之后的值 在后面绘制目标图像(非fmap)时,因为采用的是归一化位置大小来表示锚框位置,所以本来在fmap上紧密排列的锚框被均匀分布了 ''' bbox_scale = torch.tensor([[w, h, w, h]], dtype=torch.float32) d2l.show_bboxes(d2l.plt.imshow(img).axes, anchors[0] * bbox_scale)
val_iter = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=4) return train_iter, val_iter if __name__ == '__main__': # 如果不用字主程序前加上if __name__ == '__main__':会导致线程报错 # 其实通过阅读上面的load_data_pikachu()函数下的代码可以知道,报错的原因是: # 在设定train_iter和val_iter时线程数设定为了4,如果不加入if __name__ == '__main__': # 可以将num_workers=4改为等于1即可 # 试了一下,就算num_workers=4改为等于1还是不行,这就比较奇怪了。 batch_size, edge_size = 32, 256 train_iter, _ = load_data_pikachu(batch_size, edge_size, data_dir) batch = iter(train_iter).next() print(batch["image"].shape, batch["label"].shape) print("*" * 50) imgs = batch["image"][0:10].permute(0, 2, 3, 1) # .permute()表示维度换位, # 此案例中便是将之前的维度位置0维,1维,2维,3维换为如下排列0维,2维,3维,1维 bboxes = batch["label"][0:10, 0, 1:] axes = d2l.show_images(imgs, 2, 5).flatten() # a = zip(axes, bboxes) # b = list(a) for ax, bb in zip(axes, bboxes): d2l.show_bboxes(ax, [bb * edge_size], colors=['R']) plt.show() print("*" * 50)
xml_name = file_name + '.xml' img_path = os.path.join(img_dir, img_name) xml_path = os.path.join(anno_dir, xml_name) object_name_list, bbox_list = parse_voc_xml(xml_path) print(object_name_list) print(bbox_list) break # -------------------------------------------- # 可视化voc数据集 voc_visual_dir = 'visual_voc' visual_cnt = 50 visual_cnt = min(visual_cnt, len(file_name_list)) if not os.path.exists(voc_visual_dir): os.mkdir(voc_visual_dir) for file_name in file_name_list[:visual_cnt]: img_name = file_name + '.jpg' xml_name = file_name + '.xml' img_path = os.path.join(img_dir, img_name) xml_path = os.path.join(anno_dir, xml_name) visual_img_save_path = os.path.join(voc_visual_dir, img_name) object_name_list, bbox_list = parse_voc_xml(xml_path) img = plt.imread(img_path) fig = plt.imshow(img) d2l.show_bboxes(fig.axes, bbox_list, object_name_list) plt.savefig(visual_img_save_path) plt.close()
color=text_color, bbox=dict(facecolor=color, lw=0)) # 本函数已保存在dd2lzh_pytorch包中方便以后使用 def show_images(imgs, num_rows, num_cols, scale=2): figsize = (num_cols * scale, num_rows * scale) _, axes = plt.subplots(num_rows, num_cols, figsize=figsize) for i in range(num_rows): for j in range(num_cols): axes[i][j].imshow(imgs[i * num_cols + j]) axes[i][j].axes.get_xaxis().set_visible(False) axes[i][j].axes.get_yaxis().set_visible(False) return axes imgs = [train_dataset[i][0].permute(1, 2, 0) for i in range(10)] labels = [torch.Tensor(train_dataset[i][1]).unsqueeze(0) for i in range(10)] show_num_rows = 2 show_num_cols = 5 axes = d2l.show_images(imgs, show_num_rows, show_num_cols, scale=2) for i in range(show_num_rows): for j in range(show_num_cols): index = i * show_num_cols + j ax = axes[i][j] label = labels[index] d2l.show_bboxes(ax, [label.squeeze(0) * 256], colors=['r']) plt.savefig('visual_pikachu_dataset.png')
for bb in pred_bb_info: output.append([(bb.class_id if bb.index in obj_bb_idx else -1.0), bb.confidence, *bb.xyxy]) return torch.tensor(output) # shape: (锚框个数, 6) batch_output = [] for b in range(bn): batch_output.append( MultiBoxDetection_one(cls_prob[b], loc_pred[b], anchor[0], nms_threshold)) return torch.stack(batch_output) print("test MultiBoxDetection function") output = MultiBoxDetection(cls_probs.unsqueeze(dim=0), offset_preds.unsqueeze(dim=0), anchors.unsqueeze(dim=0), nms_threshold=0.5) print(output) d2l.set_figsize((3.5, 2.5)) fig = plt.imshow(img) for i in output[0].detach().cpu().numpy(): if i[0] == -1: continue label = ('dog=', 'cat=')[int(i[0])] + str(i[1]) d2l.show_bboxes(fig.axes, torch.tensor(i[2:]) * bbox_scale, label) plt.savefig('predict_after_iou.png')