val_iter = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=4) return train_iter, val_iter if __name__ == '__main__': # 如果不用字主程序前加上if __name__ == '__main__':会导致线程报错 # 其实通过阅读上面的load_data_pikachu()函数下的代码可以知道,报错的原因是: # 在设定train_iter和val_iter时线程数设定为了4,如果不加入if __name__ == '__main__': # 可以将num_workers=4改为等于1即可 # 试了一下,就算num_workers=4改为等于1还是不行,这就比较奇怪了。 batch_size, edge_size = 32, 256 train_iter, _ = load_data_pikachu(batch_size, edge_size, data_dir) batch = iter(train_iter).next() print(batch["image"].shape, batch["label"].shape) print("*" * 50) imgs = batch["image"][0:10].permute(0, 2, 3, 1) # .permute()表示维度换位, # 此案例中便是将之前的维度位置0维,1维,2维,3维换为如下排列0维,2维,3维,1维 bboxes = batch["label"][0:10, 0, 1:] axes = d2l.show_images(imgs, 2, 5).flatten() # a = zip(axes, bboxes) # b = list(a) for ax, bb in zip(axes, bboxes): d2l.show_bboxes(ax, [bb * edge_size], colors=['R']) plt.show() print("*" * 50)
def apply(img, aug, num_rows=2, num_cols=4, scale=1.5): Y = [aug(img) for _ in range(num_rows * num_cols)] d2l.show_images(Y, num_rows, num_cols, scale)
os.environ["CUDA_VISIBLE_DEVICES"] = "0" device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') print(torch.__version__) print(device) # hotdog dataset data_dir = './hotdog' print(os.listdir(data_dir)) # 使用 ImageFolder 实例来读取文件 train_imgs = ImageFolder(os.path.join(data_dir, 'train')) test_imgs = ImageFolder(os.path.join(data_dir, 'test')) hotdogs = [train_imgs[i][0] for i in range(8)] # 前 8 张正样本 not_hotdogs = [train_imgs[-i - 1][0] for i in range(8)] # 最后 8 张负样本 d2l.show_images(hotdogs + not_hotdogs, 2, 8, scale=1.4, title='hotdog dataset') # 在使用预训练模型时,一定要和预训练时作同样的预处理 # 要仔细阅读 pretrained-models 的说明,看其是如何预处理的 # 指定RGB三个通道的均值和方差来将图像通道归一化 normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) # 训练时,先从图像中裁剪出随机大小和随机高宽比的一块随机区域,然后将该区域缩放为高和宽均为224像素的输入 train_augs = transforms.Compose([ transforms.RandomResizedCrop(size=224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize ]) # 测试时,将图像的高和宽均缩放为256像素,然后从中裁剪出高和宽均为224像素的中心区域作为输入 test_augs = transforms.Compose([ transforms.Resize(size=256),
if max_num is not None: images = images[:min(max_num, len(images))] features, labels = [None] * len(images), [None] * len(images) for i, fname in tqdm(enumerate(images)): # tqdm主要作用是用于显示进度 features[i] = Image.open('%s/JPEGImages/%s.jpg' % (root, fname)).convert("RGB") labels[i] = Image.open('%s/SegmentationClass/%s.png' % (root, fname)).convert("RGB") return features, labels # PIL image voc_dir = "F:/PyCharm/Learning_pytorch/data/VOCdevkit/VOC2012" train_features, train_labels = read_voc_images(voc_dir, max_num=100) n = 5 imgs = train_features[0:n] + train_labels[0:n] d2l.show_images(imgs, 2, n) plt.show() # 列出标签中每个RGB颜色的值及其标注的类别 # 本函数已保存在d2lzh_pytorch中方便以后使用 VOC_COLORMAP = [[0, 0, 0], [128, 0, 0], [0, 128, 0], [128, 128, 0], [0, 0, 128], [128, 0, 128], [0, 128, 128], [128, 128, 128], [64, 0, 0], [192, 0, 0], [64, 128, 0], [192, 128, 0], [64, 0, 128], [192, 0, 128], [64, 128, 128], [192, 128, 128], [0, 64, 0], [128, 64, 0], [0, 192, 0], [128, 192, 0], [0, 64, 128]] # 本函数已保存在d2lzh_pytorch中方便以后使用 VOC_CLASSES = ['background', 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike', 'person', 'potted plant', 'sheep', 'sofa', 'train', 'tv/monitor']
color=text_color, bbox=dict(facecolor=color, lw=0)) # 本函数已保存在dd2lzh_pytorch包中方便以后使用 def show_images(imgs, num_rows, num_cols, scale=2): figsize = (num_cols * scale, num_rows * scale) _, axes = plt.subplots(num_rows, num_cols, figsize=figsize) for i in range(num_rows): for j in range(num_cols): axes[i][j].imshow(imgs[i * num_cols + j]) axes[i][j].axes.get_xaxis().set_visible(False) axes[i][j].axes.get_yaxis().set_visible(False) return axes imgs = [train_dataset[i][0].permute(1, 2, 0) for i in range(10)] labels = [torch.Tensor(train_dataset[i][1]).unsqueeze(0) for i in range(10)] show_num_rows = 2 show_num_cols = 5 axes = d2l.show_images(imgs, show_num_rows, show_num_cols, scale=2) for i in range(show_num_rows): for j in range(show_num_cols): index = i * show_num_cols + j ax = axes[i][j] label = labels[index] d2l.show_bboxes(ax, [label.squeeze(0) * 256], colors=['r']) plt.savefig('visual_pikachu_dataset.png')
import sys sys.path.append("..") import d2lzh_pytorch as d2l device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') data_dir = 'D:/Program/Pytorch/Datasets' os.listdir(os.path.join(data_dir, "hotdog")) # ['train', 'test'] train_imgs = ImageFolder(os.path.join(data_dir, 'hotdog/train')) test_imgs = ImageFolder(os.path.join(data_dir, 'hotdog/test')) hotdogs = [train_imgs[i][0] for i in range(8)] not_hotdogs = [train_imgs[-i - 1][0] for i in range(8)] d2l.show_images(hotdogs + not_hotdogs, 2, 8, scale=1.4) plt.show() # 在训练时,我们先从图像中裁剪出随机大小和随机高宽比的一块随机区域,然后将该区域缩放为高和宽均为224像素的输入。 # 测试时,我们将图像的高和宽均缩放为256像素,然后从中裁剪出高和宽均为224像素的中心区域作为输入。 # 此外,我们对RGB(红、绿、蓝)三个颜色通道的数值做标准化: # 每个数值减去该通道所有数值的平均值,再除以该通道所有数值的标准差作为输出。 # 指定RGB三个通道的均值和方差来将图像通道归一化 # image=(image-mean)/std # mean和std分别通过[0.485, 0.456, 0.406];[0.229, 0.224, 0.225]进行指定 normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) # 满足和预训练时作同样的预处理要求 train_augs = transforms.Compose([
contrast=0.5, saturation=0.5, hue=0.5) apply(img, color_aug) # Composing multiple augmentations augs = torchvision.transforms.Compose( [torchvision.transforms.RandomHorizontalFlip(), color_aug, shape_aug]) apply(img, augs) # 使用 CIFAR10 数据集 all_imges = torchvision.datasets.CIFAR10(train=True, root="./CIFAR", download=True) # all_imges的每一个元素都是(image, label) d2l.show_images([all_imges[i][0] for i in range(32)], 4, 8, scale=0.8) # 训练集合上使用 flip flip_aug = torchvision.transforms.Compose([ torchvision.transforms.RandomHorizontalFlip(), torchvision.transforms.ToTensor() ]) # 测试集上不使用 augmentation no_aug = torchvision.transforms.Compose([torchvision.transforms.ToTensor()]) # 检查操作系统平台 num_workers = 0 if sys.platform.startswith('win32') else 4 # 读取数据集,返回一个 DataLoader
return features, labels # PIL image voc_dir = "./Data/VOCdevkit/VOC2012" train_features, train_labels = read_voc_images(voc_dir, max_num=100) ''' 我们画出前5张输入图像和它们的标签。在标签图像中,白色和黑色分别代表边框和背景,而其他不同的颜色则对应不同的类别。 ''' n = 5 imgs = train_features[0:n] + train_labels[0:n] # print(train_features[0:n]) # print('-'*100) # print(train_labels[0:n]) # print('-'*100) # print(imgs) d2l.show_images(imgs, 2, n) ''' 接下来,我们列出标签中每个RGB颜色的值及其标注的类别。 ''' # 本函数已保存在d2lzh_pytorch中方便以后使用 VOC_COLORMAP = [[0, 0, 0], [128, 0, 0], [0, 128, 0], [128, 128, 0], [0, 0, 128], [128, 0, 128], [0, 128, 128], [128, 128, 128], [64, 0, 0], [192, 0, 0], [64, 128, 0], [192, 128, 0], [64, 0, 128], [192, 0, 128], [64, 128, 128], [192, 128, 128], [0, 64, 0], [128, 64, 0], [0, 192, 0], [128, 192, 0], [0, 64, 128]] # 本函数已保存在d2lzh_pytorch中方便以后使用 VOC_CLASSES = ['background', 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike', 'person',