def __init__(self, root, lmdir, maskdir, cmaskdir, mode="test", load_h=512, load_w=512): super().__init__() transform_ = [ transform.Resize((load_h, load_w), Image.BICUBIC), transform.ImageNormalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5]), ] self.transform = transform.Compose(transform_) transform_mask_ = [ transform.Resize((load_h, load_w), Image.BICUBIC), transform.Gray(), ] self.transform_mask = transform.Compose(transform_mask_) self.files_A = sorted(glob.glob(root + "/*.*")) self.total_len = len(self.files_A) self.batch_size = None self.shuffle = False self.drop_last = False self.num_workers = None self.buffer_size = 512*1024*1024 self.lmdir = lmdir self.maskdir = maskdir self.cmaskdir = cmaskdir self.load_h = load_h
def main(): parser = argparse.ArgumentParser() parser.add_argument('--batch_size', type=int, default=32) parser.add_argument('--epochs', type=int, default=50) parser.add_argument('--num_classes', type=int, default=130) parser.add_argument('--lr', type=float, default=2e-3) parser.add_argument('--weight_decay', type=float, default=1e-5) parser.add_argument('--resume', type=bool, default=False) parser.add_argument('--eval', type=bool, default=False) parser.add_argument('--dataroot', type=str, default='/content/drive/MyDrive/dogflg/data2/') parser.add_argument('--model_path', type=str, default='./best_model.bin') parser.add_argument('--sampleratio', type=float, default=0.8) args = parser.parse_args() transform_train = transform.Compose([ transform.Resize((256, 256)), transform.CenterCrop(224), transform.RandomHorizontalFlip(), transform.ToTensor(), transform.ImageNormalize(0.485, 0.229), # transform.ImageNormalize(0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), ]) root_dir = args.dataroot train_loader = TsinghuaDog(root_dir, batch_size=args.batch_size, train=True, part='train', shuffle=True, transform=transform_train, sample_rate=args.sampleratio) transform_test = transform.Compose([ transform.Resize((256, 256)), transform.CenterCrop(224), transform.ToTensor(), transform.ImageNormalize(0.485, 0.229), # transform.ImageNormalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), ]) val_loader = TsinghuaDog(root_dir, batch_size=args.batch_size, train=False, part='val', shuffle=False, transform=transform_test, sample_rate=args.sampleratio) epochs = args.epochs model = Net(num_classes=args.num_classes) lr = args.lr weight_decay = args.weight_decay optimizer = SGD(model.parameters(), lr=lr, momentum=0.99) if args.resume: model.load(args.model_path) print('model loaded', args.model_path) #random save for test #model.save(args.model_path) if args.eval: evaluate(model, val_loader, save_path=args.model_path) return for epoch in range(epochs): train(model, train_loader, optimizer, epoch) evaluate(model, val_loader, epoch, save_path=args.model_path)
def test_not_pil_image(self): img = jt.random((30, 40, 3)) result = transform.Compose([ transform.RandomAffine(20), transform.ToTensor(), ])(img) img = jt.random((30, 40, 3)) result = transform.Compose([ transform.ToPILImage(), transform.Gray(), transform.Resize(20), transform.ToTensor(), ])(img)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--batch_size', type=int, default=8) parser.add_argument('--epochs', type=int, default=50) parser.add_argument('--num_classes', type=int, default=130) parser.add_argument('--lr', type=float, default=2e-3) parser.add_argument('--weight_decay', type=float, default=1e-5) parser.add_argument('--resume', type=bool, default=False) parser.add_argument('--eval', type=bool, default=False) parser.add_argument('--dataroot', type=str, default='/home/gmh/dataset/TsinghuaDog/') parser.add_argument('--model_path', type=str, default='./best_model.pkl') args = parser.parse_args() transform_train = transform.Compose([ transform.Resize((512, 512)), transform.RandomCrop(448), transform.RandomHorizontalFlip(), transform.ToTensor(), transform.ImageNormalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), ]) root_dir = args.dataroot train_loader = TsinghuaDog(root_dir, batch_size=16, train=True, part='train', shuffle=True, transform=transform_train) transform_test = transform.Compose([ transform.Resize((512, 512)), transform.CenterCrop(448), transform.ToTensor(), transform.ImageNormalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), ]) val_loader = TsinghuaDog(root_dir, batch_size=16, train=False, part='val', shuffle=False, transform=transform_test) epochs = args.epochs model = Net(num_classes=args.num_classes) lr = args.lr weight_decay = args.weight_decay optimizer = SGD(model.parameters(), lr=lr, momentum=0.9) if args.resume: model.load(args.model_path) if args.eval: evaluate(model, val_loader) return for epoch in range(epochs): train(model, train_loader, optimizer, epoch) evaluate(model, val_loader, epoch)
def test_crop(self): height = random.randint(10, 32) * 2 width = random.randint(10, 32) * 2 oheight = random.randint(5, (height - 2) / 2) * 2 owidth = random.randint(5, (width - 2) / 2) * 2 img = np.ones([height, width, 3]) oh1 = (height - oheight) // 2 ow1 = (width - owidth) // 2 # imgnarrow = img[oh1:oh1 + oheight, ow1:ow1 + owidth, :] # imgnarrow.fill(0) img[oh1:oh1 + oheight, ow1:ow1 + owidth, :] = 0 # img = jt.array(img) result = transform.Compose([ transform.ToPILImage(), transform.CenterCrop((oheight, owidth)), transform.ToTensor(), ])(img) self.assertEqual( result.sum(), 0, f"height: {height} width: {width} oheight: {oheight} owdith: {owidth}" ) oheight += 1 owidth += 1 result = transform.Compose([ transform.ToPILImage(), transform.CenterCrop((oheight, owidth)), transform.ToTensor(), ])(img) sum1 = result.sum() # TODO: not pass # self.assertGreater(sum1, 1, # f"height: {height} width: {width} oheight: {oheight} owdith: {owidth}") oheight += 1 owidth += 1 result = transform.Compose([ transform.ToPILImage(), transform.CenterCrop((oheight, owidth)), transform.ToTensor(), ])(img) sum2 = result.sum() self.assertGreater( sum2, 0, f"height: {height} width: {width} oheight: {oheight} owdith: {owidth}" ) self.assertGreaterEqual( sum2, sum1, f"height: {height} width: {width} oheight: {oheight} owdith: {owidth}" )
def build_transform(self): """ Creates a basic transformation that was used to train the models """ cfg = self.cfg # we are loading images with OpenCV, so we don't need to convert them # to BGR, they are already! So all we need to do is to normalize # by 255 if we want to convert to BGR255 format, or flip the channels # if we want it to be in RGB in [0-1] range. if cfg.INPUT.TO_BGR255: to_bgr_transform = T.Lambda(lambda x: x * 255) else: to_bgr_transform = T.Lambda(lambda x: x[[2, 1, 0]]) normalize_transform = T.ImageNormalize( mean=cfg.INPUT.PIXEL_MEAN, std=cfg.INPUT.PIXEL_STD ) min_size = cfg.INPUT.MIN_SIZE_TEST max_size = cfg.INPUT.MAX_SIZE_TEST transform = T.Compose( [ T.ToPILImage(), Resize(min_size, max_size), T.ToTensor(), to_bgr_transform, normalize_transform, ] ) return transform
def main(): best_acc1 = 0 #TODO multi gpu if args.model == 'TNet26': model = TNet.Resnet26() elif args.model == 'TNet38': model = TNet.Resnet38() elif args.model == 'TNet50': model = TNet.Resnet50() elif args.model == 'Resnet26': model = jtmodels.__dict__['resnet26']() elif args.model == 'Resnet38': model = jtmodels.__dict__['resnet38']() elif args.model == 'Resnet50': model = jtmodels.__dict__['resnet50']() elif args.model == 'SAN10': model = san(sa_type = 0, layers=[2, 1, 2, 4, 1], kernels=[3, 7, 7, 7, 7], num_classes = 1000) elif args.model == 'SAN_TCN10': model = san_tcn(sa_type = 0, layers=[2, 1, 2, 4, 1], kernels=[3, 7, 7, 7, 7], num_classes = 1000) else: print("Model not found!") exit(0) if (args.use_pytorch_conv_init): pytorch_conv_init(model) model_path = os.path.join(args.save_path, 'model_best.pk') model.load(model_path) mean, std = [0.485, 0.456, 0.406], [0.229, 0.224, 0.225] # val_transform = transform.Compose([transform.Resize(256), transform.CenterCrop(224), transform.ImageNormalize(mean, std)]) # [transform diff from val!vvvv] val_transform = transform.Compose([transform.Resize(224), transform.ImageNormalize(mean, std)]) val_loader = ImageFolder('input_images', val_transform).set_attrs(batch_size=args.batch_size_val, shuffle=False, num_workers=args.workers) test(val_loader, model)
def get_transform(params, gray=False, mask=False): transform_ = [] # resize transform_.append( transform.Resize((params['load_h'], params['load_w']), Image.BICUBIC)) # flip if params['flip']: transform_.append(transform.Lambda(lambda img: transform.hflip(img))) if gray: transform_.append(transform.Gray()) if mask: transform_.append(transform.ImageNormalize([ 0., ], [ 1., ])) else: if not gray: transform_.append( transform.ImageNormalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])) else: transform_.append(transform.ImageNormalize([ 0.5, ], [ 0.5, ])) return transform.Compose(transform_)
def test_RandomAffine(self): img = jt.random((30, 40, 3)) result = transform.Compose([ transform.ToPILImage(), transform.RandomAffine(20), transform.ToTensor(), ])(img)
def get_loader(root_dir, label_file, batch_size, img_size=0, num_thread=4, pin=True, test=False, split='train'): if test is False: raise NotImplementedError else: transform = transforms.Compose([ transforms.Resize((400, 400)), transforms.ToTensor(), transforms.ImageNormalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) dataset = SemanLineDatasetTest(root_dir, label_file, transform=transform, t_transform=None) if test is False: raise NotImplementedError else: dataset.set_attrs(batch_size=batch_size, shuffle=False) print('Get dataset success.') return dataset
def transforms_imagenet_train( img_size=224, scale=None, ratio=None, hflip=0.5, vflip=0., interpolation='random', mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225), ): """ If separate==True, the transforms are returned as a tuple of 3 separate transforms for use in a mixing dataset that passes * all data through the first (primary) transform, called the 'clean' data * a portion of the data through the secondary transform * normalizes and converts the branches above with the third, final transform """ scale = tuple(scale or (0.08, 1.0)) # default imagenet scale range ratio = tuple(ratio or (3. / 4., 4. / 3.)) # default imagenet ratio range primary_tfl = [ RandomResizedCropAndInterpolation(img_size, scale=scale, ratio=ratio, interpolation=interpolation) ] if hflip > 0.: primary_tfl += [transforms.RandomHorizontalFlip(p=hflip)] if vflip > 0.: primary_tfl += [transforms.RandomVerticalFlip(p=vflip)] final_tfl = [ transforms.ToTensor(), transforms.ImageNormalize(mean=mean, std=std) ] return transforms.Compose(primary_tfl + final_tfl)
def test_TenCrop(self): img = jt.random((30, 40, 3)) result = transform.Compose([ transform.ToPILImage(), transform.TenCrop(20), transform.ToTensor(), ])(img)
def test_RandomPerspective(self): img = jt.random((30, 40, 3)) result = transform.Compose([ transform.ToPILImage(), transform.RandomPerspective(p=1), transform.ToTensor(), ])(img)
def test_imagenet(self): train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=256, shuffle=False) random.seed(0) tc_data = [] for i, data in enumerate(train_loader): tc_data.append(data) print("get", data[0].shape) if i == check_num_batch: break from jittor.dataset.dataset import ImageFolder import jittor.transform as transform dataset = ImageFolder(traindir).set_attrs(batch_size=256, shuffle=False) dataset.set_attrs(transform=transform.Compose([ transform.RandomCropAndResize(224), transform.RandomHorizontalFlip(), transform.ImageNormalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ])) random.seed(0) for i, (images, labels) in enumerate(dataset): print("compare", i) assert np.allclose(images.numpy(), tc_data[i][0].numpy()) assert np.allclose(labels.numpy(), tc_data[i][1].numpy()) if i == check_num_batch: break
def __init__(self, root, transforms_=None, img_size=128, mask_size=64, mode="train"): super().__init__() self.transform = transform.Compose(transforms_) self.img_size = img_size self.mask_size = mask_size self.mode = mode self.files = sorted(glob.glob("%s/*.jpg" % root)) self.files = self.files[:-4000] if mode == "train" else self.files[-4000:] self.set_attrs(total_len=len(self.files))
def get_dataset(): dataset = ImageFolder(traindir).set_attrs(batch_size=256, shuffle=False) dataset.set_attrs(transform=transform.Compose([ transform.Resize(224), transform.ImageNormalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]), num_workers=0) return dataset
def test_resize(self): height = random.randint(24, 32) * 2 width = random.randint(24, 32) * 2 osize = random.randint(5, 12) * 2 img = jt.ones([height, width, 3]) result = transform.Compose([ transform.ToPILImage(), transform.Resize(osize), transform.ToTensor(), ])(img) self.assertIn(osize, result.shape) if height < width: self.assertLessEqual(result.shape[1], result.shape[2]) elif width < height: self.assertGreaterEqual(result.shape[1], result.shape[2]) result = transform.Compose([ transform.ToPILImage(), transform.Resize([osize, osize]), transform.ToTensor(), ])(img) self.assertIn(osize, result.shape) self.assertEqual(result.shape[1], osize) self.assertEqual(result.shape[2], osize) oheight = random.randint(5, 12) * 2 owidth = random.randint(5, 12) * 2 result = transform.Compose([ transform.ToPILImage(), transform.Resize((oheight, owidth)), transform.ToTensor(), ])(img) self.assertEqual(result.shape[1], oheight) self.assertEqual(result.shape[2], owidth) result = transform.Compose([ transform.ToPILImage(), transform.Resize([oheight, owidth]), transform.ToTensor(), ])(img) self.assertEqual(result.shape[1], oheight) self.assertEqual(result.shape[2], owidth)
def __init__(self, root, hr_shape): hr_height, hr_width = hr_shape # transform for low resolution images and high resolution images self.lr_transform = transform.Compose([ transform.Resize((hr_height // 4, hr_height // 4), Image.BICUBIC), transform.ImageNormalize(mean, std), ]) self.hr_transform = transform.Compose([ transform.Resize((hr_height, hr_height), Image.BICUBIC), transform.ImageNormalize(mean, std), ]) self.files = sorted(glob.glob(root + "/*.*")) self.total_len = len(self.files) self.batch_size = None self.shuffle = False self.drop_last = False self.num_workers = None self.buffer_size = 512 * 1024 * 1024
def __init__(self, root, transform_=None, mode="train", attributes=None): super().__init__() self.transform = transform.Compose(transform_) self.selected_attrs = attributes self.files = sorted(glob.glob("%s/images/*.jpg" % root)) self.files = self.files[:-2000] if mode == "train" else self.files[ -2000:] self.label_path = glob.glob("%s/*.txt" % root)[0] self.annotations = self.get_annotations() self.set_attrs(total_len=len(self.files))
def __init__(self, root, input_shape, mode="train"): self.transform = transform.Compose([ transform.Resize(input_shape[-2:]), transform.ImageNormalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5]), ]) self.files = sorted(glob.glob(os.path.join(root, mode) + "/*.*")) self.total_len = len(self.files) self.batch_size = None self.shuffle = False self.drop_last = False self.num_workers = None self.buffer_size = 512 * 1024 * 1024
def __init__(self, root, transforms_=None, unaligned=False, mode="train"): self.transform = transform.Compose(transforms_) self.unaligned = unaligned self.files_A = sorted( glob.glob(os.path.join(root, "%s/A" % mode) + "/*.*")) self.files_B = sorted( glob.glob(os.path.join(root, "%s/B" % mode) + "/*.*")) self.total_len = max(len(self.files_A), len(self.files_B)) self.batch_size = None self.shuffle = False self.drop_last = False self.num_workers = None self.buffer_size = 512 * 1024 * 1024
def main(): parser = argparse.ArgumentParser() parser.add_argument('--batch_size', type=int, default=8) parser.add_argument('--epochs', type=int, default=50) parser.add_argument('--num_classes', type=int, default=130) parser.add_argument('--lr', type=float, default=2e-3) parser.add_argument('--weight_decay', type=float, default=1e-5) parser.add_argument('--resume', type=bool, default=True) parser.add_argument('--eval', type=bool, default=False) parser.add_argument('--dataroot', type=str, default='/content/drive/MyDrive/dogfl/data/TEST_A/') parser.add_argument('--model_path', type=str, default='./best_model.bin') parser.add_argument('--out_file', type=str, default='./result.json') args = parser.parse_args() root_dir = args.dataroot transform_test = transform.Compose([ transform.Resize((512, 512)), transform.CenterCrop(448), transform.ToTensor(), transform.ImageNormalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), ]) name_list = [] for _, _, _name_list in os.walk(root_dir): name_list = _name_list val_loader = TsinghuaDogExam(root_dir, batch_size=args.batch_size, train=False, name_list=name_list, shuffle=False, transform=transform_test) model = Net(num_classes=args.num_classes) if args.resume: model.load(args.model_path) print('model loaded', args.model_path) top5_class_list = evaluate(model, val_loader) # label start from 1, however it doesn't pred_result = dict(zip(name_list, top5_class_list)) with open(args.out_file, 'w') as fout: json.dump(pred_result, fout, ensure_ascii=False, indent=4)
def get_transform(new_size=None): """ obtain the image transforms required for the input data :param new_size: size of the resized images :return: image_transform => transform object from TorchVision """ # from torchvision.transforms import ToTensor, Normalize, Compose, Resize, RandomHorizontalFlip if new_size is not None: image_transform = transform.Compose([ transform.RandomHorizontalFlip(), transform.Resize(new_size), transform.ToTensor(), transform.ImageNormalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5)) ]) else: image_transform = transform.Compose([ transform.RandomHorizontalFlip(), transform.ToTensor(), transform.ImageNormalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5)) ]) return image_transform
def im_detect_bbox(model, images, target_scale, target_max_size): """ Performs bbox detection on the original image. """ transform = TT.Compose([ T.Resize(target_scale, target_max_size), TT.ToTensor(), T.Normalize(mean=cfg.INPUT.PIXEL_MEAN, std=cfg.INPUT.PIXEL_STD, to_bgr255=cfg.INPUT.TO_BGR255) ]) images = [transform(image) for image in images] images = to_image_list(images, cfg.DATALOADER.SIZE_DIVISIBILITY) return model(images)
def test_random_crop(self): height = random.randint(10, 32) * 2 width = random.randint(10, 32) * 2 oheight = random.randint(5, (height - 2) / 2) * 2 owidth = random.randint(5, (width - 2) / 2) * 2 img = np.ones((height, width, 3)) result = transform.Compose([ transform.ToPILImage(), transform.RandomCrop((oheight, owidth)), transform.ToTensor(), ])(img) self.assertEqual(result.shape[1], oheight) self.assertEqual(result.shape[2], owidth) result = transform.Compose([ transform.ToPILImage(), transform.RandomCrop((oheight, owidth)), transform.ToTensor(), ])(img) self.assertEqual(result.shape[1], oheight) self.assertEqual(result.shape[2], owidth) result = transform.Compose([ transform.ToPILImage(), transform.RandomCrop((height, width)), transform.ToTensor() ])(img) self.assertEqual(result.shape[1], height) self.assertEqual(result.shape[2], width) self.assertTrue(np.allclose(img, result.transpose(1, 2, 0))) with self.assertRaises(AssertionError): result = transform.Compose([ transform.ToPILImage(), transform.RandomCrop((height + 1, width + 1)), transform.ToTensor(), ])(img)
def build_transform(): if cfg.INPUT.TO_BGR255: to_bgr_transform = T.Lambda(lambda x: x * 255) else: to_bgr_transform = T.Lambda(lambda x: x[[2, 1, 0]]) normalize_transform = T.Normalize(mean=cfg.INPUT.PIXEL_MEAN, std=cfg.INPUT.PIXEL_STD) min_size = cfg.INPUT.MIN_SIZE_TEST max_size = cfg.INPUT.MAX_SIZE_TEST transform = T.Compose([ T.ToPILImage(), Resize(min_size, max_size), T.ToTensor(), to_bgr_transform, normalize_transform, ]) return transform
def im_detect_bbox_hflip(model, images, target_scale, target_max_size): """ Performs bbox detection on the horizontally flipped image. Function signature is the same as for im_detect_bbox. """ transform = TT.Compose([ T.Resize(target_scale, target_max_size), TT.RandomHorizontalFlip(1.0), TT.ToTensor(), T.Normalize(mean=cfg.INPUT.PIXEL_MEAN, std=cfg.INPUT.PIXEL_STD, to_bgr255=cfg.INPUT.TO_BGR255) ]) images = [transform(image) for image in images] images = to_image_list(images, cfg.DATALOADER.SIZE_DIVISIBILITY) boxlists = model(images) # Invert the detections computed on the flipped image boxlists_inv = [boxlist.transpose(0) for boxlist in boxlists] return boxlists_inv
def transforms_imagenet_eval(img_size=224, crop_pct=0.9, interpolation=Image.BICUBIC, mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)): crop_pct = crop_pct or 0.875 if isinstance(img_size, tuple): assert len(img_size) == 2 if img_size[-1] == img_size[-2]: # fall-back to older behaviour so Resize scales to shortest edge if target is square scale_size = int(math.floor(img_size[0] / crop_pct)) else: scale_size = tuple([int(x / crop_pct) for x in img_size]) else: scale_size = int(math.floor(img_size / crop_pct)) return transforms.Compose([ Resize(scale_size, interpolation), transforms.CenterCrop(img_size), transforms.ToTensor(), transforms.ImageNormalize(mean=mean, std=std) ])
def test_dataset(): root = '/home/gmh/dataset/TsinghuaDog' part = 'train' # from torchvision import transforms rgb_mean = [0.5, 0.5, 0.5] rgb_std = [0.5, 0.5, 0.5] transform_val = transform.Compose([ transform.Resize((299, 299)), transform.ToTensor(), transform.ImageNormalize(rgb_mean, rgb_std), ]) dataloader = TsinghuaDog(root, batch_size=16, train=False, part=part, shuffle=True, transform=transform_val) # def __init__(self, root_dir, batch_size, part='train', train=True, shuffle=False, transform=None, num_workers=1): for images, labels in dataloader: # print(images.size(),labels.size(),labels) pass
# 损失函数:平方误差 # 调用方法:adversarial_loss(网络输出A, 分类标签B) # 计算结果:(A-B)^2 adversarial_loss = nn.MSELoss() generator = Generator() discriminator = Discriminator() # 导入MNIST数据集 from jittor.dataset.mnist import MNIST import jittor.transform as transform transform = transform.Compose([ transform.Resize(opt.img_size), transform.Gray(), transform.ImageNormalize(mean=[0.5], std=[0.5]), ]) dataloader = MNIST(train=True, transform=transform).set_attrs(batch_size=opt.batch_size, shuffle=True) optimizer_G = nn.Adam(generator.parameters(), lr=opt.lr, betas=(opt.b1, opt.b2)) optimizer_D = nn.Adam(discriminator.parameters(), lr=opt.lr, betas=(opt.b1, opt.b2)) from PIL import Image