def get_transform(self, split): if split == 'train': return tr.Compose([ tr.RandomHorizontalFlip(), tr.RandomScaleCrop(base_size=self.base_size, crop_size=self.crop_size, scales=(0.8, 2.0), fill=constants.BG_INDEX), tr.RandomGaussianBlur(), tr.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)), tr.ToTensor() ]) elif split == 'valid': return tr.Compose([ tr.FixScaleCrop(crop_size=self.crop_size), # valid, 固定长宽比 crop tr.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)), tr.ToTensor() ]) elif split == 'test': return tr.Compose([ tr.FixedResize( size=self.crop_size), # test, 直接 resize 到 crop size tr.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)), tr.ToTensor() ]) else: return None
def make_clevr_transforms(image_set, cautious=False): normalize = T.Compose([ T.ToTensor(), T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) scales = [256, 288, 320, 352, 384] if image_set == "train": horizontal = [] if cautious else [T.RandomHorizontalFlip()] return T.Compose(horizontal + [ T.RandomSelect( T.RandomResize(scales, max_size=512), T.Compose([ T.RandomResize([320, 352, 384]), T.RandomSizeCrop(256, 512, respect_boxes=cautious), T.RandomResize(scales, max_size=512), ]), ), normalize, ]) if image_set == "val": return T.Compose([ # T.RandomResize([480], max_size=1333), normalize, ]) raise ValueError(f"unknown {image_set}")
def make_support_transforms(): """ Transforms for support images during the training phase. For transforms for support images during inference, please check dataset_support.py """ normalize = T.Compose([ T.ToTensor(), T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) scales = [ 448, 464, 480, 496, 512, 528, 544, 560, 576, 592, 608, 624, 640, 656, 672 ] return T.Compose([ T.RandomHorizontalFlip(), T.RandomColorJitter(p=0.25), T.RandomSelect( T.RandomResize(scales, max_size=672), T.Compose([ T.RandomResize([400, 500, 600]), T.RandomSizeCrop(384, 600), T.RandomResize(scales, max_size=672), ])), normalize, ])
def make_yolo_transforms(image_set): normalize = T.Compose([ T.ToTensor(), T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) if image_set == 'train': return T.Compose([ T.RandomShuffle(keep=0.7), T.RandomSelect( T.RandomRotation(180), T.RandomGrayscale(p=0.07), ), T.RandomSelect( T.RandomSizeCrop(384, 600), T.RandomGrayscale(p=0.07), ), T.RandomResize([400, 450, 500, 550, 600, 650]), normalize, ]) if image_set == 'val': return T.Compose([T.RandomResize([640]), normalize]) raise ValueError(f'unknown {image_set}')
def make_coco_transforms(image_set, cautious): normalize = T.Compose([ T.ToTensor(), T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) scales = [480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800] max_size = 1333 if image_set == "train": horizontal = [] if cautious else [T.RandomHorizontalFlip()] return T.Compose(horizontal + [ T.RandomSelect( T.RandomResize(scales, max_size=max_size), T.Compose([ T.RandomResize([400, 500, 600]), T.RandomSizeCrop(384, max_size, respect_boxes=cautious), T.RandomResize(scales, max_size=max_size), ]), ), normalize, ]) if image_set == "val": return T.Compose([ T.RandomResize([800], max_size=max_size), normalize, ]) raise ValueError(f"unknown {image_set}")
def make_coco_transforms(image_set): # T 是项目中的datatsets/transforms.py模块,以上各个数据增强的方法在该模 # 块中的实现和 torchvision.transforms 中的差不多,其中ToTensor()会 # 将图像的通道维度排列在第一个维度,并且像素值归一化到0-1范围内;而Normalize() # 则会根据指定的均值和标准差对图像进行归一化,同时将标签的bbox转换为Cx Cy W H形式,后归一化到0-1,此处不再进行解析,感兴趣的可以去参考源码。 normalize = T.Compose([ T.ToTensor(), T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) scales = [480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800] if image_set == 'train': return T.Compose([ T.RandomHorizontalFlip(), T.RandomSelect( T.RandomResize(scales, max_size=1333), T.Compose([ T.RandomResize([400, 500, 600]), T.RandomSizeCrop(384, 600), T.RandomResize(scales, max_size=1333), ])), normalize, ]) if image_set == 'val': return T.Compose([ T.RandomResize([800], max_size=1333), normalize, ]) raise ValueError(f'unknown {image_set}')
def make_coco_transforms(image_set): normalize = T.Compose([ T.ToTensor(), T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) scales = [480, 512, 544, 576, 608, 640, 672, 704, 736, 768] if image_set == 'train': return T.Compose([ T.RandomHorizontalFlip(), T.RandomResize(scales, max_size=800), T.PhotometricDistort(), T.Compose([ T.RandomResize([400, 500, 600]), T.RandomSizeCrop(384, 600), # To suit the GPU memory the scale might be different T.RandomResize([300], max_size=540), #for r50 #T.RandomResize([280], max_size=504),#for r101 ]), normalize, ]) if image_set == 'val': return T.Compose([ T.RandomResize([360], max_size=640), normalize, ]) raise ValueError(f'unknown {image_set}')
def get(cls, args): normalize = arraytransforms.Normalize(mean=[0.502], std=[1.0]) train_dataset = cls(args.data, 'train', args.train_file, args.cache, transform=transforms.Compose([ arraytransforms.RandomResizedCrop(224), arraytransforms.ToTensor(), normalize, transforms.Lambda(torch.cat), ])) val_transforms = transforms.Compose([ arraytransforms.Resize(256), arraytransforms.CenterCrop(224), arraytransforms.ToTensor(), normalize, transforms.Lambda(torch.cat), ]) val_dataset = cls(args.data, 'val', args.val_file, args.cache, transform=val_transforms) valvideo_dataset = cls(args.data, 'val_video', args.val_file, args.cache, transform=val_transforms) return train_dataset, val_dataset, valvideo_dataset
def make_coco_transforms(image_set): normalize = T.Compose([ T.ToTensor(), T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) scales = [480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800] if image_set == 'train': return T.Compose([ T.RandomHorizontalFlip(), T.RandomSelect( T.RandomResize(scales, max_size=1333), T.Compose([ T.RandomResize([400, 500, 600]), T.RandomSizeCrop(384, 600), T.RandomResize(scales, max_size=1333), ])), normalize, ]) if image_set == 'val': return T.Compose([ T.RandomResize([800], max_size=1333), normalize, ]) raise ValueError(f'unknown {image_set}')
def make_default_transforms(): normalize = T.Compose([ T.ToTensor(), T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) return T.Compose([ T.RandomResize([800], max_size=1333), normalize, ])
def _set_transforms(self): """Decide transformations to data to be applied""" transforms_list_by_scan_type = { 'con': lambda: ([ custom_transforms.ToFloat(), custom_transforms.ResampleTo(self.resample), custom_transforms.Normalize(input_bounds=(-500, 500), pixel_mean=0.379), custom_transforms.EltListToBlockTensor(), ]), 'noncon': lambda: ([ custom_transforms.ToFloat(), custom_transforms.ResampleTo(self.resample), custom_transforms.Normalize(input_bounds=(-500, 500), pixel_mean=0.379), custom_transforms.EltListToBlockTensor(), ]), 'cta': lambda: ([ custom_transforms.ToFloat(), custom_transforms.ResampleTo(self.resample), custom_transforms.Normalize(input_bounds=(-500, 500), pixel_mean=0.271), custom_transforms.EltListToBlockTensor(), ]), 'ctp': lambda: ([ custom_transforms.ToFloat(), custom_transforms.ResampleTo((8, 256, 256)), # fixed resampling rate since CTPs are always 7-10, and don't need an extra argument custom_transforms.Normalize(input_bounds=(0, 60), pixel_mean=0.0158), custom_transforms.EltListToBlockTensor(), ]) } transform_dict = dict( (scan, transforms.Compose(transform_list_f())) for scan, transform_list_f in transforms_list_by_scan_type.items() if scan in (self.input_scan, self.goal_scan)) return transform_dict
def initialise(args): # Load model and loss function detr, criterion, postprocessors = build_model(args) class_embed = torch.nn.Linear(256, 81, bias=True) if os.path.exists(args.pretrained): print(f"Load pre-trained model from {args.pretrained}") detr.load_state_dict(torch.load(args.pretrained)['model_state_dict']) w, b = detr.class_embed.state_dict().values() keep = [ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 27, 28, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 67, 70, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 84, 85, 86, 87, 88, 89, 90, 91 ] class_embed.load_state_dict(dict(weight=w[keep], bias=b[keep])) detr.class_embed = class_embed if os.path.exists(args.resume): print(f"Resume from model at {args.resume}") detr.load_state_dict(torch.load(args.resume)['model_state_dict']) # Prepare dataset transforms normalize = T.Compose([ T.ToTensor(), T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) scales = [480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800] if args.partition == 'train2015': transforms = T.Compose([ T.RandomHorizontalFlip(), T.ColorJitter(.4, .4, .4), T.RandomSelect( T.RandomResize(scales, max_size=1333), T.Compose([ T.RandomResize([400, 500, 600]), T.RandomSizeCrop(384, 600), T.RandomResize(scales, max_size=1333), ])), normalize, ]) if args.partition == 'test2015': transforms = T.Compose([ T.RandomResize([800], max_size=1333), normalize, ]) # Load dataset dataset = HICODetObject( pocket.data.HICODet( root=os.path.join(args.data_root, f'hico_20160224_det/images/{args.partition}'), anno_file=os.path.join(args.data_root, f'instances_{args.partition}.json'), target_transform=pocket.ops.ToTensor(input_format='dict')), transforms) return detr, criterion, postprocessors['bbox'], dataset
def get_image(args): test_image_raw = Image.open(args.demo_image).convert('RGB') transform = T.Compose([ # T.RandomResize([400], max_size=1333), T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) test_image = transform(test_image_raw, target=None) test_image = [test_image[0]] nested_test_image = nested_tensor_from_tensor_list(test_image) return nested_test_image
def make_voc_transforms(image_set, args): normalize = T.Compose([ T.ToTensor(), T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) if image_set == 'train': if args.backbone == 'vit': return T.Compose([ T.RandomHorizontalFlip(), T.RandomResize([(384, 384)], max_size=384), T.ToTensor(), T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), ]) else: return T.Compose([ T.RandomHorizontalFlip(), T.RandomSelect( T.RandomResize([400, 500, 600], max_size=1000), T.Compose([ T.RandomResize([400, 500, 600]), T.RandomCrop((384, 384)), T.RandomResize([400, 500, 600], max_size=1000), ])), normalize, ]) if image_set == 'val': if args.backbone == 'vit': return T.Compose([ T.RandomHorizontalFlip(), T.RandomResize([(384, 384)], max_size=384), T.ToTensor(), T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), ]) else: return T.Compose([ T.RandomResize([600], max_size=1000), normalize, ])
def make_coco_transforms(image_set): """coco dataset preprocessing Parameters ---------- image_set : {str-like, scalar} "train" or "val" Returns ------- result : {T.Compose list} the image transform operation list """ # ------------------- # normalize the image # normalize = T.Compose([ # processing method T.ToTensor(), T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) # R,G,B每层的归一化用到的均值和方差 ]) # ------------------- # ------------------- # scales = [480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800] # ------------------- # ------------------- # according to the image set types, # determine the preprocessing steps of the image # if image_set == 'train': return T.Compose([ T.RandomHorizontalFlip(), T.RandomSelect( T.RandomResize(scales, max_size=1333), T.Compose([ T.RandomResize([400, 500, 600]), T.RandomSizeCrop(384, 600), T.RandomResize(scales, max_size=1333), ])), normalize, ]) if image_set == 'val': return T.Compose([ T.RandomResize([800], max_size=1333), normalize, ]) # ------------------- raise ValueError(f'unknown {image_set}')
def make_mot_transforms(image_set, args): normalize = T.Compose([ T.ToTensor(), T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) scales = [480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800] if image_set == 'train' and not args.eval: return T.Compose([ T.RandomHorizontalFlip(), T.RandomSelect( T.RandomResize(scales, max_size=1333), T.Compose([ T.RandomResize([800, 1000, 1200]), # T.RandomSizeCrop(384, 600), T.RandomSizeCrop_MOT(800, 1200), T.RandomResize(scales, max_size=1333), ]) ), normalize, ]) if image_set == 'trainall' and not args.eval: return T.Compose([ T.RandomHorizontalFlip(), T.RandomSelect( T.RandomResize(scales, max_size=1333), T.Compose([ T.RandomResize([800, 1000, 1200]), # T.RandomSizeCrop(384, 600), T.RandomSizeCrop_MOT(800, 1200), T.RandomResize(scales, max_size=1333), ]) ), normalize, ]) if image_set == 'val' or args.eval: return T.Compose([ T.RandomResize([800], max_size=1333), normalize, ]) if image_set == 'test' or args.eval: return T.Compose([ T.RandomResize([800], max_size=1333), normalize, ]) raise ValueError(f'unknown {image_set}')
def make_indoor360_transforms(image_set): # 단순히 Tensor 변환과 normalize만 하는 걸로 수정함. normalize = T.Compose([ T.ToTensor(), T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) if image_set == 'train': return normalize if image_set == 'val': return normalize if image_set == 'test': return normalize raise ValueError(f'unknown {image_set}')
def make_kitti_transforms(image_set): normalize = T.Compose([ T.ToTensor(), T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) if image_set == 'train': return T.Compose([ T.RandomHorizontalFlip(), normalize, ]) if image_set == 'val': return normalize raise ValueError(f'unknwon {image_set}')
def make_support_transforms(): """ Transforms for support images during inference stage. For transforms of support images during training, please visit dataset.py and dataset_fewshot.py """ normalize = T.Compose([ T.ToTensor(), T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) scales = [512, 528, 544, 560, 576, 592, 608, 624, 640, 656, 672, 688, 704] return T.Compose([ T.RandomHorizontalFlip(), T.RandomResize(scales, max_size=768), normalize, ])
def make_coco_transforms(image_set, args): normalize = T.Compose([ T.ToTensor(), T.Normalize([0.538, 0.494, 0.453], [0.257, 0.263, 0.273]) ]) scales = [ 480, 512, 544, 576, 608, 640, 672, 680, 690, 704, 736, 768, 788, 800 ] test_size = 1100 max = 1333 if args.eval: return T.Compose([ T.RandomResize([test_size], max_size=max), normalize, ]) else: if image_set == 'train': return T.Compose([ T.RandomSelect( T.RandomHorizontalFlip(), T.RandomVerticalFlip(), ), T.RandomSelect( T.RandomResize(scales, max_size=max), T.Compose([ T.RandomResize([400, 500, 600]), T.RandomSizeCrop(384, 600), T.RandomResize(scales, max_size=max), ])), T.ColorJitter(), normalize, ]) if image_set == 'val': return T.Compose([ T.RandomResize([test_size], max_size=max), normalize, ]) raise ValueError(f'unknown {image_set}')
def _set_transforms(self): """Decide transformations to data to be applied""" transforms_list_by_scan_type = { 'noncon': lambda: ([ custom_transforms.ToFloat(), custom_transforms.ResampleTo(self.resample), custom_transforms.Normalize(input_bounds=(-500, 500), pixel_mean=0.379), custom_transforms.EltListToBlockTensor(), transforms.RandomAffine(degrees=15, translate=(0.05, 0.05)), transforms.RandomHorizontalFlip(p=0.5), ]) } transform_dict = dict( (scan, transforms.Compose(transform_list_f())) for scan, transform_list_f in transforms_list_by_scan_type.items()) return transform_dict
def make_default_transforms(image_set, crop=False): normalize = T.Compose([ T.ToTensor(), T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) # normalize = T.Compose([T.ToTensor()]) scales = [480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800] if image_set == "train": if not crop: trans = [ T.RandomHorizontalFlip(), T.RandomSelect( T.RandomResize(scales, max_size=1333), T.Compose([ T.RandomResize([400, 500, 600]), T.RandomSizeCrop(384, 600), T.RandomResize(scales, max_size=1333), ]), ), normalize, ] else: trans = [ T.RandomHorizontalFlip(), T.RandomResize([256, 324]), T.RandomSizeCrop(224, 224), normalize, ] return T.Compose(trans) if image_set in ["test", "val"]: return T.Compose([ T.RandomResize([800], max_size=1333), normalize, ]) raise ValueError(f"unknown {image_set}")
def make_transforms(): """ Transforms for query images during the few-shot fine-tuning stage. """ normalize = T.Compose([ T.ToTensor(), T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) scales = [480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800] return T.Compose([ T.RandomHorizontalFlip(), T.RandomColorJitter(p=0.3333), T.RandomSelect( T.RandomResize(scales, max_size=1152), T.Compose([ T.RandomResize([400, 500, 600]), T.RandomSizeCrop(384, 600), T.RandomResize(scales, max_size=1152), ])), normalize, ])
def make_self_det_transforms(image_set): normalize = T.Compose([ T.ToTensor(), T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) # The image of ImageNet is relatively small. scales = [320, 336, 352, 368, 400, 416, 432, 448, 464, 480] if image_set == 'train': return T.Compose([ # T.RandomHorizontalFlip(), HorizontalFlip may cause the pretext too difficult, so we remove it T.RandomResize(scales, max_size=600), normalize, ]) if image_set == 'val': return T.Compose([ T.RandomResize([480], max_size=600), normalize, ]) raise ValueError(f'unknown {image_set}')
def make_coco_transforms(args, image_set): normalize = T.Compose([ T.ToTensor(), T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) # scales = [480, 512, 544, 576, 608, 640, 672, 704, 736, 768] # max_size = 512 if image_set == 'train': return T.Compose([ T.RandomHorizontalFlip(), # T.RandomSelect( # T.RandomResize(scales, max_size=max_size), # T.Compose([ # T.RandomResize([400, 500, 600]), # T.RandomSizeCrop(384, 600), # T.RandomResize(scales, max_size=max_size), # ]) # ), T.RandomResize_Crop(crop_range=[0.5, 1], max_size=args.train_size), # T.RandomResize([400, 500, 600]), # T.RandomSizeCrop(384, 600), # T.RandomResize(scales, max_size=512), T.PadToFix(args.train_size, position="random"), normalize, ]) if image_set == 'val': return T.Compose([ T.RandomResize([args.val_size], max_size=args.val_size), T.PadToFix(args.val_size, position="start"), normalize, ]) raise ValueError(f'unknown {image_set}')
def main(args): utils.init_distributed_mode(args) print("git:\n {}\n".format(utils.get_sha())) if args.frozen_weights is not None: assert args.masks, "Frozen training is meant for segmentation only" print(args) device = torch.device(args.device) # fix the seed for reproducibility seed = args.seed + utils.get_rank() torch.manual_seed(seed) np.random.seed(seed) random.seed(seed) Dataset = get_dataset(args.dataset, args.task) f = open(args.data_cfg) data_config = json.load(f) trainset_paths = data_config['train'] dataset_root = data_config['root'] f.close() normalize = T.Compose([ T.ToTensor(), T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) scales = [480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800] transforms = T.Compose([ T.RandomHorizontalFlip(), T.RandomSelect( T.RandomResize(scales, max_size=1333), T.Compose([ T.RandomResize([400, 500, 600]), T.RandomSizeCrop(384, 600), # T.RandomSizeCrop_MOT(384, 600), T.RandomResize(scales, max_size=1333), ])), normalize, ]) dataset_train = Dataset(args, dataset_root, trainset_paths, (1088, 608), augment=True, transforms=transforms) args.nID = dataset_train.nID model, criterion, postprocessors = build_model(args) model.to(device) model_without_ddp = model # dataset_train = build_dataset(image_set='train', args=args) # dataset_val = build_dataset(image_set='val', args=args) if args.distributed: if args.cache_mode: sampler_train = samplers.NodeDistributedSampler(dataset_train) # sampler_val = samplers.NodeDistributedSampler(dataset_val, shuffle=False) else: sampler_train = samplers.DistributedSampler(dataset_train) # sampler_val = samplers.DistributedSampler(dataset_val, shuffle=False) else: sampler_train = torch.utils.data.RandomSampler(dataset_train) # sampler_val = torch.utils.data.SequentialSampler(dataset_val) batch_sampler_train = torch.utils.data.BatchSampler(sampler_train, args.batch_size, drop_last=True) data_loader_train = DataLoader(dataset_train, batch_sampler=batch_sampler_train, collate_fn=utils.collate_fn, num_workers=args.num_workers, pin_memory=True) # data_loader_val = DataLoader(dataset_val, args.batch_size, sampler=sampler_val, # drop_last=False, collate_fn=utils.collate_fn, num_workers=args.num_workers, # pin_memory=True) # data_loader_train = torch.utils.data.DataLoader( # dataset_train, # batch_size=args.batch_size, # shuffle=True, # num_workers=args.num_workers, # pin_memory=True, # drop_last=True # ) # lr_backbone_names = ["backbone.0", "backbone.neck", "input_proj", "transformer.encoder"] def match_name_keywords(n, name_keywords): out = False for b in name_keywords: if b in n: out = True break return out for n, p in model_without_ddp.named_parameters(): print(n) # 用于将classifer不更新参数 # for name,p in model_without_ddp.named_parameters(): # if name.startswith('classifier'): # p.requires_grad = False param_dicts = [{ "params": [ p for n, p in model_without_ddp.named_parameters() if not match_name_keywords(n, args.lr_backbone_names) and not match_name_keywords(n, args.lr_linear_proj_names) and p.requires_grad ], "lr": args.lr, }, { "params": [ p for n, p in model_without_ddp.named_parameters() if match_name_keywords(n, args.lr_backbone_names) and p.requires_grad ], "lr": args.lr_backbone, }, { "params": [ p for n, p in model_without_ddp.named_parameters() if match_name_keywords(n, args.lr_linear_proj_names) and p.requires_grad ], "lr": args.lr * args.lr_linear_proj_mult, }] if args.sgd: optimizer = torch.optim.SGD(param_dicts, lr=args.lr, momentum=0.9, weight_decay=args.weight_decay) else: optimizer = torch.optim.AdamW(param_dicts, lr=args.lr, weight_decay=args.weight_decay) lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, args.lr_drop) # optimizer.add_param_group({'params': criterion.parameters()}) if args.distributed: model = torch.nn.parallel.DistributedDataParallel( model, device_ids=[args.gpu]) model_without_ddp = model.module if args.frozen_weights is not None: checkpoint = torch.load(args.frozen_weights, map_location='cpu') model_without_ddp.detr.load_state_dict(checkpoint['model']) output_dir = Path(args.output_dir) if args.resume: if args.resume.startswith('https'): checkpoint = torch.hub.load_state_dict_from_url(args.resume, map_location='cpu', check_hash=True) else: checkpoint = torch.load(args.resume, map_location='cpu') model_dict = model_without_ddp.state_dict() #当前模型参数 pretrained_dict = { k: v for k, v in checkpoint['model'].items() if k not in [ "class_embed.0.weight", "class_embed.0.bias", "class_embed.1.weight", "class_embed.1.bias", "class_embed.2.weight", "class_embed.2.bias", "class_embed.3.weight", "class_embed.3.bias", "class_embed.4.weight", "class_embed.4.bias", "class_embed.5.weight", "class_embed.5.bias" ] } model_dict.update(pretrained_dict) # missing_keys, unexpected_keys = model_without_ddp.load_state_dict(checkpoint['model'], strict=False) missing_keys, unexpected_keys = model_without_ddp.load_state_dict( model_dict, strict=False) unexpected_keys = [ k for k in unexpected_keys if not (k.endswith('total_params') or k.endswith('total_ops')) ] if len(missing_keys) > 0: print('Missing Keys: {}'.format(missing_keys)) if len(unexpected_keys) > 0: print('Unexpected Keys: {}'.format(unexpected_keys)) if not args.eval and 'optimizer' in checkpoint and 'lr_scheduler' in checkpoint and 'epoch' in checkpoint: args.start_epoch = checkpoint['epoch'] + 1 # optimizer.load_state_dict(checkpoint['optimizer']) # if not args.eval and 'optimizer' in checkpoint and 'lr_scheduler' in checkpoint and 'epoch' in checkpoint: # import copy # p_groups = copy.deepcopy(optimizer.param_groups) # # optimizer.load_state_dict(checkpoint['optimizer']) # for pg, pg_old in zip(optimizer.param_groups, p_groups): # pg['lr'] = pg_old['lr'] # pg['initial_lr'] = pg_old['initial_lr'] # # print(optimizer.param_groups) # lr_scheduler.load_state_dict(checkpoint['lr_scheduler']) # # todo: this is a hack for doing experiment that resume from checkpoint and also modify lr scheduler (e.g., decrease lr in advance). # args.override_resumed_lr_drop = True # if args.override_resumed_lr_drop: # print('Warning: (hack) args.override_resumed_lr_drop is set to True, so args.lr_drop would override lr_drop in resumed lr_scheduler.') # lr_scheduler.step_size = args.lr_drop # lr_scheduler.base_lrs = list(map(lambda group: group['initial_lr'], optimizer.param_groups)) # lr_scheduler.step(lr_scheduler.last_epoch) # model.add_module('id') # [p for p in model.named_parameters() if not p[1].requires_grad] # 用于将classifer不更新参数 # optimizer = torch.optim.SGD(filter(lambda x: "classifier" not in x[0], model.parameters()), lr=args.lr, # momentum=0.9, weight_decay=1e-4) # model.classifier.training = False n_parameters = sum(p.numel() for p in model.parameters() if p.requires_grad) print('number of params:', n_parameters) print("Start training") start_time = time.time() for epoch in range(args.start_epoch, args.epochs): if args.distributed: sampler_train.set_epoch(epoch) train_stats = train_one_epoch(args, model, criterion, data_loader_train, optimizer, device, epoch, args.clip_max_norm) lr_scheduler.step() if args.output_dir: checkpoint_paths = [output_dir / 'checkpoint.pth'] # extra checkpoint before LR drop and every 5 epochs if (epoch + 1) % args.lr_drop == 0 or (epoch + 1) % 5 == 0: checkpoint_paths.append(output_dir / f'checkpoint{epoch:04}.pth') for checkpoint_path in checkpoint_paths: utils.save_on_master( { 'model': model_without_ddp.state_dict(), 'optimizer': optimizer.state_dict(), 'lr_scheduler': lr_scheduler.state_dict(), 'epoch': epoch, 'args': args, }, checkpoint_path) total_time = time.time() - start_time total_time_str = str(datetime.timedelta(seconds=int(total_time))) print('Training time {}'.format(total_time_str))
def make_bat_transforms(image_set, custom_backbone): if image_set == 'train': return T.Compose([T.ToTensor(), Resize((256, 512)), T.Normalize([0.058526332422855], [0.1667903737826997], custom_backbone)]) if image_set == 'test': return T.Compose([T.ToTensor(), Resize((256, 512)), T.Normalize([0.050141380321473965], [0.3160132308495623], custom_backbone)])
## transforms.ToTensor(), ## normalize])) ## test_loader = data.DataLoader(testset,batch_size=args.test_batch_size, shuffle=False, ## num_workers=args.workers, pin_memory=True) # #test_loader = data.DataLoader( # datasets.ImageFolder(valdir, transforms.Compose([ # transforms.Resize(256), # transforms.CenterCrop(224), # transforms.ToTensor(), # normalize, # ]),Train = False), # batch_size=args.test_batch_size, shuffle=False, # num_workers=args.workers, pin_memory=True) input_size = 224 normalize = transforms.Normalize(meanfile=args.data + '/imagenet_mean.binaryproto') train_dataset = datasets.ImageFolder( args.data, transforms.Compose([ transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize, transforms.RandomSizedCrop(input_size), ]), Train=True) #if args.distributed: # train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset) #else: train_sampler = None
def main(): global args, best_prec1 args = parser.parse_args() args.distributed = args.world_size > 1 if args.distributed: dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url, world_size=args.world_size) # create model if args.arch=='alexnet': model = model_list.alexnet(pretrained=args.pretrained) input_size = 227 else: raise Exception('Model not supported yet') if not args.distributed: if args.arch.startswith('alexnet') or args.arch.startswith('vgg'): model.features = torch.nn.DataParallel(model.features) model.cuda() else: model = torch.nn.DataParallel(model).cuda() else: model.cuda() model = torch.nn.parallel.DistributedDataParallel(model) # define loss function (criterion) and optimizer criterion = nn.CrossEntropyLoss().cuda() optimizer = torch.optim.SGD(model.parameters(), args.lr, momentum=args.momentum, weight_decay=args.weight_decay) # optionally resume from a checkpoint if args.resume: if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) print("=> loaded checkpoint '{}' (epoch {})" .format(args.resume, checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(args.resume)) cudnn.benchmark = True # Data loading code if not os.path.exists(args.data+'/imagenet_mean.binaryproto'): print("==> Data directory"+args.data+"does not exits") print("==> Please specify the correct data path by") print("==> --data <DATA_PATH>") return normalize = transforms.Normalize( meanfile=args.data+'/imagenet_mean.binaryproto') train_dataset = datasets.ImageFolder( args.data, transforms.Compose([ transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize, transforms.RandomSizedCrop(input_size), ]), Train=True) if args.distributed: train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset) else: train_sampler = None train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True, sampler=train_sampler) val_loader = torch.utils.data.DataLoader( datasets.ImageFolder(args.data, transforms.Compose([ transforms.ToTensor(), normalize, transforms.CenterCrop(input_size), ]), Train=False), batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) print model if args.evaluate: validate(val_loader, model, criterion) return for epoch in range(args.start_epoch, args.epochs): if args.distributed: train_sampler.set_epoch(epoch) adjust_learning_rate(optimizer, epoch) # train for one epoch train(train_loader, model, criterion, optimizer, epoch) # evaluate on validation set prec1 = validate(val_loader, model, criterion) # remember best prec@1 and save checkpoint is_best = prec1 > best_prec1 best_prec1 = max(prec1, best_prec1) save_checkpoint({ 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'best_prec1': best_prec1, 'optimizer' : optimizer.state_dict(), }, is_best)
def main(): global args, best_prec1 args = parser.parse_args() # create model if args.arch == 'alexnet': model = model_list.alexnet(pretrained=args.pretrained) input_size = 227 else: raise Exception('Model not supported yet') if args.arch.startswith('alexnet') or args.arch.startswith('vgg'): model.features = torch.nn.DataParallel(model.features) model.cuda() else: model = torch.nn.DataParallel(model).cuda() # define loss function (criterion) and optimizer criterion = nn.CrossEntropyLoss().cuda() optimizer = torch.optim.Adam(model.parameters(), args.lr, weight_decay=args.weight_decay) for m in model.modules(): if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear): c = float(m.weight.data[0].nelement()) m.weight.data = m.weight.data.normal_(0, 1.0 / c) elif isinstance(m, nn.BatchNorm2d): m.weight.data = m.weight.data.zero_().add(1.0) # optionally resume from a checkpoint if args.resume: if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) del checkpoint else: print("=> no checkpoint found at '{}'".format(args.resume)) cudnn.benchmark = True # Data loading code if args.caffe_data: print('==> Using Caffe Dataset') cwd = os.getcwd() sys.path.append(cwd + '/../') import datasets as datasets import datasets.transforms as transforms if not os.path.exists(args.data + '/imagenet_mean.binaryproto'): print("==> Data directory" + args.data + "does not exits") print("==> Please specify the correct data path by") print("==> --data <DATA_PATH>") return normalize = transforms.Normalize(meanfile=args.data + '/imagenet_mean.binaryproto') train_dataset = datasets.ImageFolder( args.data, transforms.Compose([ transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize, transforms.RandomSizedCrop(input_size), ]), Train=True) train_sampler = None train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True, sampler=train_sampler) val_loader = torch.utils.data.DataLoader(datasets.ImageFolder( args.data, transforms.Compose([ transforms.ToTensor(), normalize, transforms.CenterCrop(input_size), ]), Train=False), batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) else: print('==> Using Pytorch Dataset') import torchvision import torchvision.transforms as transforms import torchvision.datasets as datasets traindir = os.path.join(args.data, 'train') valdir = os.path.join(args.data, 'val') normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) torchvision.set_image_backend('accimage') train_dataset = datasets.ImageFolder( traindir, transforms.Compose([ transforms.RandomResizedCrop(input_size, scale=(0.40, 1.0)), transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize, ])) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) val_loader = torch.utils.data.DataLoader(datasets.ImageFolder( valdir, transforms.Compose([ transforms.Resize(256), transforms.CenterCrop(input_size), transforms.ToTensor(), normalize, ])), batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) print model # define the binarization operator global bin_op bin_op = util.BinOp(model) if args.evaluate: validate(val_loader, model, criterion) return for epoch in range(args.start_epoch, args.epochs): adjust_learning_rate(optimizer, epoch) # train for one epoch train(train_loader, model, criterion, optimizer, epoch) # evaluate on validation set prec1 = validate(val_loader, model, criterion) # remember best prec@1 and save checkpoint is_best = prec1 > best_prec1 best_prec1 = max(prec1, best_prec1) save_checkpoint( { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'best_prec1': best_prec1, 'optimizer': optimizer.state_dict(), }, is_best)