def train_transform(rgb, depth): s = np.random.uniform(1.0, 1.5) # random scaling # print("scale factor s={}".format(s)) depth_np = depth / s angle = np.random.uniform(-5.0, 5.0) # random rotation degrees do_flip = np.random.uniform(0.0, 1.0) < 0.5 # random horizontal flip # perform 1st part of data augmentation transform = transforms.Compose([ transforms.Crop(130, 10, 240, 1200), transforms.Rotate(angle), transforms.Resize(s), transforms.CenterCrop((oheight, owidth)), transforms.HorizontalFlip(do_flip) ]) rgb_np = transform(rgb) # random color jittering rgb_np = color_jitter(rgb_np) rgb_np = np.asfarray(rgb_np, dtype='float') / 255 # Scipy affine_transform produced RuntimeError when the depth map was # given as a 'numpy.ndarray' depth_np = np.asfarray(depth_np, dtype='float32') depth_np = transform(depth_np) return rgb_np, depth_np
def val_transform(rgb, depth): depth_np = depth # perform 1st part of data augmentation transform = transforms.Compose([ transforms.Crop(130, 10, 240, 1200), transforms.CenterCrop((oheight, owidth)), ]) rgb_np = transform(rgb) rgb_np = np.asfarray(rgb_np, dtype='float') / 255 depth_np = np.asfarray(depth_np, dtype='float32') depth_np = transform(depth_np) return rgb_np, depth_np
def train_cocodet_preprocess_factory( *, square_edge, augmentation=True, extended_scale=False, orientation_invariant=0.0, rescale_images=1.0, ): if not augmentation: return transforms.Compose([ transforms.NormalizeAnnotations(), transforms.RescaleAbsolute(square_edge), transforms.CenterPad(square_edge), transforms.EVAL_TRANSFORM, ]) if extended_scale: rescale_t = transforms.RescaleRelative( scale_range=(0.5 * rescale_images, 2.0 * rescale_images), power_law=True, stretch_range=(0.75, 1.33)) else: rescale_t = transforms.RescaleRelative( scale_range=(0.7 * rescale_images, 1.5 * rescale_images), power_law=True, stretch_range=(0.75, 1.33)) orientation_t = None if orientation_invariant: orientation_t = transforms.RandomApply(transforms.RotateBy90(), orientation_invariant) return transforms.Compose([ transforms.NormalizeAnnotations(), transforms.AnnotationJitter(), transforms.RandomApply(transforms.HFlip(COCO_KEYPOINTS, HFLIP), 0.5), rescale_t, transforms.Crop(square_edge, use_area_of_interest=False), transforms.CenterPad(square_edge), orientation_t, transforms.MinSize(min_side=4.0), transforms.UnclippedArea(), transforms.UnclippedSides(), transforms.TRAIN_TRANSFORM, ])
def __init__(self, scale_min=0.5, scale_max=1.75, rotate_min=-1, rotate_max=1, train_h=512, train_w=1024, ignore_label=255, mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)): train_transform = T.Compose([ T.RandScale([scale_min, scale_max]), T.RandRotate([rotate_min, rotate_max], padding=mean, ignore_label=ignore_label), T.RandomGaussianBlur(), T.RandomHorizontalFlip(), T.Crop([train_h, train_w], crop_type='rand', padding=mean, ignore_label=ignore_label), T.ToTensor(), T.Normalize(mean=mean, std=std) ])
def main(args): if args.output_dir: utils.mkdir(args.output_dir) utils.init_distributed_mode(args) print(args) device = torch.device(args.device) # dataset, num_classes = get_dataset(args.data_path, args.dataset, "train", get_transform(train=True)) # dataset_test, _ = get_dataset(args.data_path, args.dataset, "val", get_transform(train=False)) value_scale = 255 mean = [0.485, 0.456, 0.406] mean = [item * value_scale for item in mean] std = [0.229, 0.224, 0.225] std = [item * value_scale for item in std] scale_min = 0.5 scale_max = 1.75 rotate_min = -1 rotate_max = 1 train_h = 512 train_w = 1024 ignore_label = 255 train_transform = T.Compose([ T.RandScale([scale_min, scale_max]), T.RandRotate([rotate_min, rotate_max], padding=mean, ignore_label=ignore_label), T.RandomGaussianBlur(), T.RandomHorizontalFlip(), T.Crop([train_h, train_w], crop_type='rand', padding=mean, ignore_label=ignore_label), T.ToTensor(), T.Normalize(mean=mean, std=std) ]) dataset_train = dataset.CityscapesData(split='train', data_root=args.data_root, data_list=args.train_list, transform=train_transform) val_transform = T.Compose([ T.Crop([train_h, train_w], crop_type='center', padding=mean, ignore_label=ignore_label), T.ToTensor(), T.Normalize(mean=mean, std=std) ]) dataset_test = dataset.CityscapesData(split='val', data_root=args.data_root, data_list=args.val_list, transform=val_transform) if args.distributed: train_sampler = torch.utils.data.distributed.DistributedSampler(dataset_train) test_sampler = torch.utils.data.distributed.DistributedSampler(dataset_test) else: train_sampler = torch.utils.data.RandomSampler(dataset_train) test_sampler = torch.utils.data.SequentialSampler(dataset_test) data_loader = torch.utils.data.DataLoader( dataset_train, batch_size=args.batch_size, sampler=train_sampler, num_workers=args.workers, collate_fn=utils.collate_fn, drop_last=True) data_loader_test = torch.utils.data.DataLoader( dataset_test, batch_size=1, sampler=test_sampler, num_workers=args.workers, collate_fn=utils.collate_fn) num_classes = 19 # model = torchvision.models.segmentation.__dict__[args.model](num_classes=num_classes, # aux_loss=args.aux_loss, # pretrained=args.pretrained) if args.pretrained: supernet = OFAMobileNetV3( n_classes=1000, dropout_rate=0, width_mult_list=1.2, ks_list=[3, 5, 7], expand_ratio_list=[3, 4, 6], depth_list=[2, 3, 4], ) arch = OFAArchitecture.from_legency_string(args.arch) supernet.set_active_subnet(ks=arch.ks, e=arch.ratios, d=arch.depths) model = supernet.get_active_subnet() s = torch.load("model_best.pth.tar", map_location="cpu") model.load_state_dict(s["state_dict_ema"]) model = convert2segmentation(model=model, begin_index_index=17) print("load pretrained model.") else: supernet = SPOSMobileNetV3Segmentation(width_mult=1.2) model = supernet.get_subnet(OFAArchitecture.from_legency_string(args.arch)) model.to(device) if args.distributed: model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model) model_without_ddp = model if args.distributed: model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu]) model_without_ddp = model.module if args.pretrained: params_to_optimize = [ {"params": [p for p in model_without_ddp.backbone.parameters() if p.requires_grad]}, {"params": [p for p in model_without_ddp.stem.parameters() if p.requires_grad]}, ] if args.aux_loss: params = [p for p in model_without_ddp.classifier.parameters() if p.requires_grad] params_to_optimize.append({"params": params, "lr": args.lr * 10}) optimizer = torch.optim.SGD( params_to_optimize, lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) else: params_to_optimize = [ {"params": [p for p in model_without_ddp.first_conv.parameters() if p.requires_grad]}, {"params": [p for p in model_without_ddp.blocks.parameters() if p.requires_grad]}, {"params": [p for p in model_without_ddp.remain_block.parameters() if p.requires_grad]}, {"params": [p for p in model_without_ddp.head.parameters() if p.requires_grad]}, ] if args.aux_loss: params = [p for p in model_without_ddp.aux_head.parameters() if p.requires_grad] params_to_optimize.append({"params": params, "lr": args.lr}) optimizer = torch.optim.SGD( params_to_optimize, lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) lr_scheduler = torch.optim.lr_scheduler.LambdaLR( optimizer, lambda x: (1 - x / (len(data_loader) * args.epochs)) ** 0.9) if args.resume: checkpoint = torch.load(args.resume, map_location='cpu') model_without_ddp.load_state_dict(checkpoint['model'], strict=not args.test_only) if not args.test_only: optimizer.load_state_dict(checkpoint['optimizer']) lr_scheduler.load_state_dict(checkpoint['lr_scheduler']) args.start_epoch = checkpoint['epoch'] + 1 if args.test_only: confmat = evaluate(model, data_loader_test, device=device, num_classes=num_classes) print(confmat) return start_time = time.time() for epoch in range(args.start_epoch, args.epochs): if args.distributed: train_sampler.set_epoch(epoch) train_one_epoch(model, criterion, optimizer, data_loader, lr_scheduler, device, epoch, args.print_freq) confmat = evaluate(model, data_loader_test, device=device, num_classes=num_classes) print(confmat) utils.save_on_master( { 'model': model_without_ddp.state_dict(), 'optimizer': optimizer.state_dict(), 'lr_scheduler': lr_scheduler.state_dict(), 'epoch': epoch, 'args': args }, os.path.join(args.output_dir, 'model_{}.pth'.format(epoch))) total_time = time.time() - start_time total_time_str = str(datetime.timedelta(seconds=int(total_time))) print('Training time {}'.format(total_time_str))
def main(args): if args.output_dir: utils.mkdir(args.output_dir) utils.init_distributed_mode(args) print(args) device = torch.device(args.device) # dataset, num_classes = get_dataset(args.data_path, args.dataset, "train", get_transform(train=True)) # dataset_test, _ = get_dataset(args.data_path, args.dataset, "val", get_transform(train=False)) value_scale = 255 mean = [0.485, 0.456, 0.406] mean = [item * value_scale for item in mean] std = [0.229, 0.224, 0.225] std = [item * value_scale for item in std] scale_min = 0.5 scale_max = 1.75 rotate_min = -1 rotate_max = 1 train_h = 512 train_w = 1024 ignore_label = 255 train_transform = T.Compose([ T.RandScale([scale_min, scale_max]), T.RandRotate([rotate_min, rotate_max], padding=mean, ignore_label=ignore_label), T.RandomGaussianBlur(), T.RandomHorizontalFlip(), T.Crop([train_h, train_w], crop_type='rand', padding=mean, ignore_label=ignore_label), T.ToTensor(), T.Normalize(mean=mean, std=std) ]) dataset_train = dataset.CityscapesData(split='train', data_root=args.data_root, data_list=args.train_list, transform=train_transform) val_transform = T.Compose([ T.Crop([train_h, train_w], crop_type='center', padding=mean, ignore_label=ignore_label), T.ToTensor(), T.Normalize(mean=mean, std=std) ]) dataset_test = dataset.CityscapesData(split='val', data_root=args.data_root, data_list=args.val_list, transform=val_transform) writer = SummaryWriter("vis") def _add_prefix(s, prefix, joiner='/'): return joiner.join([prefix, s]) def visualize_image(images, global_step, num_row=3, image_set="TRAIN", name="IMAGE"): grid_image = make_grid(images[:num_row].clone().cpu().data, num_row, normalize=True) writer.add_image( _add_prefix(name.capitalize(), image_set.capitalize()), grid_image, global_step) img, tgt = dataset_test[0] print(img.shape) visualize_image(img, 0) writer.close()
# target = dict() # for k, v in t.items(): # if v is None: # target[k] = v # else: # target[k] = torch.tensor(v) # targets_ += (target, ) return torch.stack(images, 0), targets if __name__ == '__main__': data_dir = '/disk1/home/xiaj/res/face/maskface/zhihu_akou' # train_transform = transforms.Compose(IMG_DIM, RGB_MEAN) train_transform = transforms.Compose1([ transforms.Crop(IMG_DIM), transforms.Distort(), transforms.Pad2Square( fill=RGB_MEAN ), # TODO: 以后这个RGB_MEAN需要删除,减均值操作直接交给 transforms.Normalize transforms.RandomMirror(p=0.5), transforms.Resize(min_size=IMG_DIM, max_size=IMG_DIM), transforms.ToTensor(), ]) train_dataset = dataset.MaskFaceAkouDataset(data_dir, train_transform) epoch_size = math.ceil(len(train_dataset) / BATCH_SIZE) max_iter = MAX_EPOCH * epoch_size train_loader = DataLoader(dataset=train_dataset, batch_size=BATCH_SIZE,