val_iter = torch.utils.data.DataLoader(val_dataset,
                                           batch_size=batch_size,
                                           shuffle=False,
                                           num_workers=4)
    return train_iter, val_iter


if __name__ == '__main__':
    # 如果不用字主程序前加上if __name__ == '__main__':会导致线程报错
    # 其实通过阅读上面的load_data_pikachu()函数下的代码可以知道,报错的原因是:
    # 在设定train_iter和val_iter时线程数设定为了4,如果不加入if __name__ == '__main__':
    # 可以将num_workers=4改为等于1即可

    # 试了一下,就算num_workers=4改为等于1还是不行,这就比较奇怪了。
    batch_size, edge_size = 32, 256
    train_iter, _ = load_data_pikachu(batch_size, edge_size, data_dir)
    batch = iter(train_iter).next()
    print(batch["image"].shape, batch["label"].shape)
    print("*" * 50)
    imgs = batch["image"][0:10].permute(0, 2, 3, 1)
    # .permute()表示维度换位,
    # 此案例中便是将之前的维度位置0维,1维,2维,3维换为如下排列0维,2维,3维,1维
    bboxes = batch["label"][0:10, 0, 1:]

    axes = d2l.show_images(imgs, 2, 5).flatten()
    # a = zip(axes, bboxes)
    # b = list(a)
    for ax, bb in zip(axes, bboxes):
        d2l.show_bboxes(ax, [bb * edge_size], colors=['R'])
    plt.show()
print("*" * 50)
예제 #2
0
def apply(img, aug, num_rows=2, num_cols=4, scale=1.5):
    Y = [aug(img) for _ in range(num_rows * num_cols)]
    d2l.show_images(Y, num_rows, num_cols, scale)
예제 #3
0
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(torch.__version__)
print(device)

# hotdog dataset
data_dir = './hotdog'
print(os.listdir(data_dir))

# 使用 ImageFolder 实例来读取文件
train_imgs = ImageFolder(os.path.join(data_dir, 'train'))
test_imgs = ImageFolder(os.path.join(data_dir, 'test'))

hotdogs = [train_imgs[i][0] for i in range(8)]  # 前 8 张正样本
not_hotdogs = [train_imgs[-i - 1][0] for i in range(8)]  # 最后 8 张负样本
d2l.show_images(hotdogs + not_hotdogs, 2, 8, scale=1.4, title='hotdog dataset')

# 在使用预训练模型时,一定要和预训练时作同样的预处理
# 要仔细阅读 pretrained-models 的说明,看其是如何预处理的
# 指定RGB三个通道的均值和方差来将图像通道归一化
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])
# 训练时,先从图像中裁剪出随机大小和随机高宽比的一块随机区域,然后将该区域缩放为高和宽均为224像素的输入
train_augs = transforms.Compose([
    transforms.RandomResizedCrop(size=224),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(), normalize
])
# 测试时,将图像的高和宽均缩放为256像素,然后从中裁剪出高和宽均为224像素的中心区域作为输入
test_augs = transforms.Compose([
    transforms.Resize(size=256),
예제 #4
0
    if max_num is not None:
        images = images[:min(max_num, len(images))]
    features, labels = [None] * len(images), [None] * len(images)
    for i, fname in tqdm(enumerate(images)):
        # tqdm主要作用是用于显示进度
        features[i] = Image.open('%s/JPEGImages/%s.jpg' % (root, fname)).convert("RGB")
        labels[i] = Image.open('%s/SegmentationClass/%s.png' % (root, fname)).convert("RGB")
    return features, labels # PIL image

voc_dir = "F:/PyCharm/Learning_pytorch/data/VOCdevkit/VOC2012"
train_features, train_labels = read_voc_images(voc_dir, max_num=100)


n = 5
imgs = train_features[0:n] + train_labels[0:n]
d2l.show_images(imgs, 2, n)
plt.show()

# 列出标签中每个RGB颜色的值及其标注的类别
# 本函数已保存在d2lzh_pytorch中方便以后使用
VOC_COLORMAP = [[0, 0, 0], [128, 0, 0], [0, 128, 0], [128, 128, 0],
                [0, 0, 128], [128, 0, 128], [0, 128, 128], [128, 128, 128],
                [64, 0, 0], [192, 0, 0], [64, 128, 0], [192, 128, 0],
                [64, 0, 128], [192, 0, 128], [64, 128, 128], [192, 128, 128],
                [0, 64, 0], [128, 64, 0], [0, 192, 0], [128, 192, 0],
                [0, 64, 128]]
# 本函数已保存在d2lzh_pytorch中方便以后使用
VOC_CLASSES = ['background', 'aeroplane', 'bicycle', 'bird', 'boat',
               'bottle', 'bus', 'car', 'cat', 'chair', 'cow',
               'diningtable', 'dog', 'horse', 'motorbike', 'person',
               'potted plant', 'sheep', 'sofa', 'train', 'tv/monitor']
예제 #5
0
                      color=text_color,
                      bbox=dict(facecolor=color, lw=0))


# 本函数已保存在dd2lzh_pytorch包中方便以后使用
def show_images(imgs, num_rows, num_cols, scale=2):
    figsize = (num_cols * scale, num_rows * scale)
    _, axes = plt.subplots(num_rows, num_cols, figsize=figsize)
    for i in range(num_rows):
        for j in range(num_cols):
            axes[i][j].imshow(imgs[i * num_cols + j])
            axes[i][j].axes.get_xaxis().set_visible(False)
            axes[i][j].axes.get_yaxis().set_visible(False)
    return axes


imgs = [train_dataset[i][0].permute(1, 2, 0) for i in range(10)]
labels = [torch.Tensor(train_dataset[i][1]).unsqueeze(0) for i in range(10)]

show_num_rows = 2
show_num_cols = 5
axes = d2l.show_images(imgs, show_num_rows, show_num_cols, scale=2)

for i in range(show_num_rows):
    for j in range(show_num_cols):
        index = i * show_num_cols + j
        ax = axes[i][j]
        label = labels[index]
        d2l.show_bboxes(ax, [label.squeeze(0) * 256], colors=['r'])
plt.savefig('visual_pikachu_dataset.png')
import sys
sys.path.append("..")
import d2lzh_pytorch as d2l

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

data_dir = 'D:/Program/Pytorch/Datasets'
os.listdir(os.path.join(data_dir, "hotdog"))  # ['train', 'test']

train_imgs = ImageFolder(os.path.join(data_dir, 'hotdog/train'))
test_imgs = ImageFolder(os.path.join(data_dir, 'hotdog/test'))

hotdogs = [train_imgs[i][0] for i in range(8)]
not_hotdogs = [train_imgs[-i - 1][0] for i in range(8)]
d2l.show_images(hotdogs + not_hotdogs, 2, 8, scale=1.4)
plt.show()

# 在训练时,我们先从图像中裁剪出随机大小和随机高宽比的一块随机区域,然后将该区域缩放为高和宽均为224像素的输入。
# 测试时,我们将图像的高和宽均缩放为256像素,然后从中裁剪出高和宽均为224像素的中心区域作为输入。
# 此外,我们对RGB(红、绿、蓝)三个颜色通道的数值做标准化:
# 每个数值减去该通道所有数值的平均值,再除以该通道所有数值的标准差作为输出。

# 指定RGB三个通道的均值和方差来将图像通道归一化
# image=(image-mean)/std
# mean和std分别通过[0.485, 0.456, 0.406];[0.229, 0.224, 0.225]进行指定
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])
# 满足和预训练时作同样的预处理要求

train_augs = transforms.Compose([
예제 #7
0
                                               contrast=0.5,
                                               saturation=0.5,
                                               hue=0.5)
apply(img, color_aug)

# Composing multiple augmentations
augs = torchvision.transforms.Compose(
    [torchvision.transforms.RandomHorizontalFlip(), color_aug, shape_aug])
apply(img, augs)

# 使用 CIFAR10 数据集
all_imges = torchvision.datasets.CIFAR10(train=True,
                                         root="./CIFAR",
                                         download=True)
# all_imges的每一个元素都是(image, label)
d2l.show_images([all_imges[i][0] for i in range(32)], 4, 8, scale=0.8)

# 训练集合上使用 flip
flip_aug = torchvision.transforms.Compose([
    torchvision.transforms.RandomHorizontalFlip(),
    torchvision.transforms.ToTensor()
])

# 测试集上不使用 augmentation
no_aug = torchvision.transforms.Compose([torchvision.transforms.ToTensor()])

# 检查操作系统平台
num_workers = 0 if sys.platform.startswith('win32') else 4


# 读取数据集,返回一个 DataLoader
    return features, labels  # PIL image

voc_dir = "./Data/VOCdevkit/VOC2012"
train_features, train_labels = read_voc_images(voc_dir, max_num=100)

'''
我们画出前5张输入图像和它们的标签。在标签图像中,白色和黑色分别代表边框和背景,而其他不同的颜色则对应不同的类别。
'''
n = 5
imgs = train_features[0:n] + train_labels[0:n]
# print(train_features[0:n])
# print('-'*100)
# print(train_labels[0:n])
# print('-'*100)
# print(imgs)
d2l.show_images(imgs, 2, n)

'''
接下来,我们列出标签中每个RGB颜色的值及其标注的类别。
'''
# 本函数已保存在d2lzh_pytorch中方便以后使用
VOC_COLORMAP = [[0, 0, 0], [128, 0, 0], [0, 128, 0], [128, 128, 0],
                [0, 0, 128], [128, 0, 128], [0, 128, 128], [128, 128, 128],
                [64, 0, 0], [192, 0, 0], [64, 128, 0], [192, 128, 0],
                [64, 0, 128], [192, 0, 128], [64, 128, 128], [192, 128, 128],
                [0, 64, 0], [128, 64, 0], [0, 192, 0], [128, 192, 0],
                [0, 64, 128]]
# 本函数已保存在d2lzh_pytorch中方便以后使用
VOC_CLASSES = ['background', 'aeroplane', 'bicycle', 'bird', 'boat',
               'bottle', 'bus', 'car', 'cat', 'chair', 'cow',
               'diningtable', 'dog', 'horse', 'motorbike', 'person',