Ejemplo n.º 1
0
def display_anchors(fmap_w, fmap_h, s):
    d2l.set_figsize((3.5, 2.5))
    fig = plt.imshow(img)

    feature_map_size = (fmap_h, fmap_w)  # (h,w)
    anchors = d2l.MultiBoxPrior(feature_map_size, sizes=s, ratios=[1, 2, 0.5])

    bbox_scale = torch.tensor([[w, h, w, h]], dtype=torch.float32)
    d2l.show_bboxes(fig.axes, anchors * bbox_scale)
    plt.savefig('multiscale_anchor_{}_{}.png'.format(fmap_h, fmap_w))
Ejemplo n.º 2
0
def display_anchors(fmap_w, fmap_h, s):
    # 前两维的取值不影响输出结果(原书这里是(1, 10, fmap_w, fmap_h), 我认为错了)
    fmap = torch.zeros((1, 10, fmap_h, fmap_w), dtype=torch.float32)

    # 平移所有锚框使均匀分布在图片上
    offset_x, offset_y = 1.0 / fmap_w, 1.0 / fmap_h
    anchors = d2l.MultiBoxPrior(fmap, sizes=s, ratios=[1, 2, 0.5]) + \
        torch.tensor([offset_x/2, offset_y/2, offset_x/2, offset_y/2])

    bbox_scale = torch.tensor([[w, h, w, h]], dtype=torch.float32)
    d2l.show_bboxes(d2l.plt.imshow(img).axes, anchors[0] * bbox_scale)
Ejemplo n.º 3
0
def display(img, output):
    d2l.set_figsize((5, 5))
    fig = d2l.plt.imshow(img)
    h, w = img.shape[0:2]
    bbox_scale = torch.tensor([w, h, w, h], dtype=torch.float32)
    bbox_list = []
    labels_list = []
    for row in output:
        bbox_list.append(row[2:6] * bbox_scale)
        labels_list.append('%.2f' % row[1])
    d2l.show_bboxes(fig.axes, bbox_list, labels_list, 'w')
    plt.savefig('pikachu_detect.png')
def display_anchors(fmap_w, fmap_h, s):
    # 前两维的取值不影响输出结果(原书这里是(1, 10, fmap_w, fmap_h), 我认为错了)
    fmap = torch.zeros((1, 10, fmap_h, fmap_w), dtype=torch.float32)

    # 平移所有锚框使均匀分布在图片上
    offset_x, offset_y = 1.0 / fmap_w, 1.0 / fmap_h
    anchors = d2l.MultiBoxPrior(fmap, sizes=s, ratios=[
        1, 2, 0.5
    ]) + torch.tensor([offset_x / 2, offset_y / 2, offset_x / 2, offset_y / 2])
    # d2l.MultiBoxPrior函数用处:指定输入(fmap)、一组大小和一组宽高比,该函数将返回输入的所有锚框。
    '''
    这里之所以说会均匀采样,是因为在图像位置标示值中都采用了归一化,及所有图像上的位置都可以用两个0到1的数表示。
    通过
    anchors=d2l.MultiBoxPrior(fmap,sizes=s,ratios=[1,2,0.5])+torch.tensor([offset_x/2,offset_y/2,offset_x/2,offset_y/2])
    得到的Anchors是针对fmap的anchor,其形状为1,fmap的像素高宽乘积再乘上设定的锚框高宽比长度,4
    其实就是返回fmap的像素高宽乘积再乘上设定的锚框高宽比长度个锚框,每个锚框包含4个坐标,坐标值为归一化之后的值
    在后面绘制目标图像(非fmap)时,因为采用的是归一化位置大小来表示锚框位置,所以本来在fmap上紧密排列的锚框被均匀分布了
    '''

    bbox_scale = torch.tensor([[w, h, w, h]], dtype=torch.float32)
    d2l.show_bboxes(d2l.plt.imshow(img).axes, anchors[0] * bbox_scale)
    val_iter = torch.utils.data.DataLoader(val_dataset,
                                           batch_size=batch_size,
                                           shuffle=False,
                                           num_workers=4)
    return train_iter, val_iter


if __name__ == '__main__':
    # 如果不用字主程序前加上if __name__ == '__main__':会导致线程报错
    # 其实通过阅读上面的load_data_pikachu()函数下的代码可以知道,报错的原因是:
    # 在设定train_iter和val_iter时线程数设定为了4,如果不加入if __name__ == '__main__':
    # 可以将num_workers=4改为等于1即可

    # 试了一下,就算num_workers=4改为等于1还是不行,这就比较奇怪了。
    batch_size, edge_size = 32, 256
    train_iter, _ = load_data_pikachu(batch_size, edge_size, data_dir)
    batch = iter(train_iter).next()
    print(batch["image"].shape, batch["label"].shape)
    print("*" * 50)
    imgs = batch["image"][0:10].permute(0, 2, 3, 1)
    # .permute()表示维度换位,
    # 此案例中便是将之前的维度位置0维,1维,2维,3维换为如下排列0维,2维,3维,1维
    bboxes = batch["label"][0:10, 0, 1:]

    axes = d2l.show_images(imgs, 2, 5).flatten()
    # a = zip(axes, bboxes)
    # b = list(a)
    for ax, bb in zip(axes, bboxes):
        d2l.show_bboxes(ax, [bb * edge_size], colors=['R'])
    plt.show()
print("*" * 50)
Ejemplo n.º 6
0
    xml_name = file_name + '.xml'
    img_path = os.path.join(img_dir, img_name)
    xml_path = os.path.join(anno_dir, xml_name)
    object_name_list, bbox_list = parse_voc_xml(xml_path)
    print(object_name_list)
    print(bbox_list)
    break

# --------------------------------------------
# 可视化voc数据集
voc_visual_dir = 'visual_voc'
visual_cnt = 50

visual_cnt = min(visual_cnt, len(file_name_list))
if not os.path.exists(voc_visual_dir):
    os.mkdir(voc_visual_dir)
for file_name in file_name_list[:visual_cnt]:
    img_name = file_name + '.jpg'
    xml_name = file_name + '.xml'
    img_path = os.path.join(img_dir, img_name)
    xml_path = os.path.join(anno_dir, xml_name)
    visual_img_save_path = os.path.join(voc_visual_dir, img_name)

    object_name_list, bbox_list = parse_voc_xml(xml_path)

    img = plt.imread(img_path)
    fig = plt.imshow(img)
    d2l.show_bboxes(fig.axes, bbox_list, object_name_list)
    plt.savefig(visual_img_save_path)
    plt.close()
Ejemplo n.º 7
0
                      color=text_color,
                      bbox=dict(facecolor=color, lw=0))


# 本函数已保存在dd2lzh_pytorch包中方便以后使用
def show_images(imgs, num_rows, num_cols, scale=2):
    figsize = (num_cols * scale, num_rows * scale)
    _, axes = plt.subplots(num_rows, num_cols, figsize=figsize)
    for i in range(num_rows):
        for j in range(num_cols):
            axes[i][j].imshow(imgs[i * num_cols + j])
            axes[i][j].axes.get_xaxis().set_visible(False)
            axes[i][j].axes.get_yaxis().set_visible(False)
    return axes


imgs = [train_dataset[i][0].permute(1, 2, 0) for i in range(10)]
labels = [torch.Tensor(train_dataset[i][1]).unsqueeze(0) for i in range(10)]

show_num_rows = 2
show_num_cols = 5
axes = d2l.show_images(imgs, show_num_rows, show_num_cols, scale=2)

for i in range(show_num_rows):
    for j in range(show_num_cols):
        index = i * show_num_cols + j
        ax = axes[i][j]
        label = labels[index]
        d2l.show_bboxes(ax, [label.squeeze(0) * 256], colors=['r'])
plt.savefig('visual_pikachu_dataset.png')
Ejemplo n.º 8
0
        for bb in pred_bb_info:
            output.append([(bb.class_id if bb.index in obj_bb_idx else -1.0),
                           bb.confidence, *bb.xyxy])

        return torch.tensor(output)  # shape: (锚框个数, 6)

    batch_output = []
    for b in range(bn):
        batch_output.append(
            MultiBoxDetection_one(cls_prob[b], loc_pred[b], anchor[0],
                                  nms_threshold))

    return torch.stack(batch_output)


print("test MultiBoxDetection function")
output = MultiBoxDetection(cls_probs.unsqueeze(dim=0),
                           offset_preds.unsqueeze(dim=0),
                           anchors.unsqueeze(dim=0),
                           nms_threshold=0.5)
print(output)

d2l.set_figsize((3.5, 2.5))
fig = plt.imshow(img)
for i in output[0].detach().cpu().numpy():
    if i[0] == -1:
        continue
    label = ('dog=', 'cat=')[int(i[0])] + str(i[1])
    d2l.show_bboxes(fig.axes, torch.tensor(i[2:]) * bbox_scale, label)
plt.savefig('predict_after_iou.png')