def make_clevr_transforms(image_set, cautious=False): normalize = T.Compose([ T.ToTensor(), T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) scales = [256, 288, 320, 352, 384] if image_set == "train": horizontal = [] if cautious else [T.RandomHorizontalFlip()] return T.Compose(horizontal + [ T.RandomSelect( T.RandomResize(scales, max_size=512), T.Compose([ T.RandomResize([320, 352, 384]), T.RandomSizeCrop(256, 512, respect_boxes=cautious), T.RandomResize(scales, max_size=512), ]), ), normalize, ]) if image_set == "val": return T.Compose([ # T.RandomResize([480], max_size=1333), normalize, ]) raise ValueError(f"unknown {image_set}")
def make_coco_transforms(image_set): normalize = T.Compose([ T.ToTensor(), T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) scales = [480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800] if image_set == 'train': return T.Compose([ T.RandomHorizontalFlip(), T.RandomSelect( T.RandomResize(scales, max_size=1333), T.Compose([ T.RandomResize([400, 500, 600]), T.RandomSizeCrop(384, 600), T.RandomResize(scales, max_size=1333), ])), normalize, ]) if image_set == 'val': return T.Compose([ T.RandomResize([800], max_size=1333), normalize, ]) raise ValueError(f'unknown {image_set}')
def make_support_transforms(): """ Transforms for support images during the training phase. For transforms for support images during inference, please check dataset_support.py """ normalize = T.Compose([ T.ToTensor(), T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) scales = [ 448, 464, 480, 496, 512, 528, 544, 560, 576, 592, 608, 624, 640, 656, 672 ] return T.Compose([ T.RandomHorizontalFlip(), T.RandomColorJitter(p=0.25), T.RandomSelect( T.RandomResize(scales, max_size=672), T.Compose([ T.RandomResize([400, 500, 600]), T.RandomSizeCrop(384, 600), T.RandomResize(scales, max_size=672), ])), normalize, ])
def make_coco_transforms(image_set): # T 是项目中的datatsets/transforms.py模块,以上各个数据增强的方法在该模 # 块中的实现和 torchvision.transforms 中的差不多,其中ToTensor()会 # 将图像的通道维度排列在第一个维度,并且像素值归一化到0-1范围内;而Normalize() # 则会根据指定的均值和标准差对图像进行归一化,同时将标签的bbox转换为Cx Cy W H形式,后归一化到0-1,此处不再进行解析,感兴趣的可以去参考源码。 normalize = T.Compose([ T.ToTensor(), T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) scales = [480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800] if image_set == 'train': return T.Compose([ T.RandomHorizontalFlip(), T.RandomSelect( T.RandomResize(scales, max_size=1333), T.Compose([ T.RandomResize([400, 500, 600]), T.RandomSizeCrop(384, 600), T.RandomResize(scales, max_size=1333), ])), normalize, ]) if image_set == 'val': return T.Compose([ T.RandomResize([800], max_size=1333), normalize, ]) raise ValueError(f'unknown {image_set}')
def make_yolo_transforms(image_set): normalize = T.Compose([ T.ToTensor(), T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) if image_set == 'train': return T.Compose([ T.RandomShuffle(keep=0.7), T.RandomSelect( T.RandomRotation(180), T.RandomGrayscale(p=0.07), ), T.RandomSelect( T.RandomSizeCrop(384, 600), T.RandomGrayscale(p=0.07), ), T.RandomResize([400, 450, 500, 550, 600, 650]), normalize, ]) if image_set == 'val': return T.Compose([T.RandomResize([640]), normalize]) raise ValueError(f'unknown {image_set}')
def make_coco_transforms(image_set, cautious): normalize = T.Compose([ T.ToTensor(), T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) scales = [480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800] max_size = 1333 if image_set == "train": horizontal = [] if cautious else [T.RandomHorizontalFlip()] return T.Compose(horizontal + [ T.RandomSelect( T.RandomResize(scales, max_size=max_size), T.Compose([ T.RandomResize([400, 500, 600]), T.RandomSizeCrop(384, max_size, respect_boxes=cautious), T.RandomResize(scales, max_size=max_size), ]), ), normalize, ]) if image_set == "val": return T.Compose([ T.RandomResize([800], max_size=max_size), normalize, ]) raise ValueError(f"unknown {image_set}")
def make_coco_transforms(image_set): normalize = T.Compose([ T.ToTensor(), T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) scales = [480, 512, 544, 576, 608, 640, 672, 704, 736, 768] if image_set == 'train': return T.Compose([ T.RandomHorizontalFlip(), T.RandomResize(scales, max_size=800), T.PhotometricDistort(), T.Compose([ T.RandomResize([400, 500, 600]), T.RandomSizeCrop(384, 600), # To suit the GPU memory the scale might be different T.RandomResize([300], max_size=540), #for r50 #T.RandomResize([280], max_size=504),#for r101 ]), normalize, ]) if image_set == 'val': return T.Compose([ T.RandomResize([360], max_size=640), normalize, ]) raise ValueError(f'unknown {image_set}')
def initialise(args): # Load model and loss function detr, criterion, postprocessors = build_model(args) class_embed = torch.nn.Linear(256, 81, bias=True) if os.path.exists(args.pretrained): print(f"Load pre-trained model from {args.pretrained}") detr.load_state_dict(torch.load(args.pretrained)['model_state_dict']) w, b = detr.class_embed.state_dict().values() keep = [ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 27, 28, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 67, 70, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 84, 85, 86, 87, 88, 89, 90, 91 ] class_embed.load_state_dict(dict(weight=w[keep], bias=b[keep])) detr.class_embed = class_embed if os.path.exists(args.resume): print(f"Resume from model at {args.resume}") detr.load_state_dict(torch.load(args.resume)['model_state_dict']) # Prepare dataset transforms normalize = T.Compose([ T.ToTensor(), T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) scales = [480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800] if args.partition == 'train2015': transforms = T.Compose([ T.RandomHorizontalFlip(), T.ColorJitter(.4, .4, .4), T.RandomSelect( T.RandomResize(scales, max_size=1333), T.Compose([ T.RandomResize([400, 500, 600]), T.RandomSizeCrop(384, 600), T.RandomResize(scales, max_size=1333), ])), normalize, ]) if args.partition == 'test2015': transforms = T.Compose([ T.RandomResize([800], max_size=1333), normalize, ]) # Load dataset dataset = HICODetObject( pocket.data.HICODet( root=os.path.join(args.data_root, f'hico_20160224_det/images/{args.partition}'), anno_file=os.path.join(args.data_root, f'instances_{args.partition}.json'), target_transform=pocket.ops.ToTensor(input_format='dict')), transforms) return detr, criterion, postprocessors['bbox'], dataset
def make_coco_transforms(image_set): """coco dataset preprocessing Parameters ---------- image_set : {str-like, scalar} "train" or "val" Returns ------- result : {T.Compose list} the image transform operation list """ # ------------------- # normalize the image # normalize = T.Compose([ # processing method T.ToTensor(), T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) # R,G,B每层的归一化用到的均值和方差 ]) # ------------------- # ------------------- # scales = [480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800] # ------------------- # ------------------- # according to the image set types, # determine the preprocessing steps of the image # if image_set == 'train': return T.Compose([ T.RandomHorizontalFlip(), T.RandomSelect( T.RandomResize(scales, max_size=1333), T.Compose([ T.RandomResize([400, 500, 600]), T.RandomSizeCrop(384, 600), T.RandomResize(scales, max_size=1333), ])), normalize, ]) if image_set == 'val': return T.Compose([ T.RandomResize([800], max_size=1333), normalize, ]) # ------------------- raise ValueError(f'unknown {image_set}')
def make_default_transforms(): normalize = T.Compose([ T.ToTensor(), T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) return T.Compose([ T.RandomResize([800], max_size=1333), normalize, ])
def make_coco_transforms(image_set, args): normalize = T.Compose([ T.ToTensor(), T.Normalize([0.538, 0.494, 0.453], [0.257, 0.263, 0.273]) ]) scales = [ 480, 512, 544, 576, 608, 640, 672, 680, 690, 704, 736, 768, 788, 800 ] test_size = 1100 max = 1333 if args.eval: return T.Compose([ T.RandomResize([test_size], max_size=max), normalize, ]) else: if image_set == 'train': return T.Compose([ T.RandomSelect( T.RandomHorizontalFlip(), T.RandomVerticalFlip(), ), T.RandomSelect( T.RandomResize(scales, max_size=max), T.Compose([ T.RandomResize([400, 500, 600]), T.RandomSizeCrop(384, 600), T.RandomResize(scales, max_size=max), ])), T.ColorJitter(), normalize, ]) if image_set == 'val': return T.Compose([ T.RandomResize([test_size], max_size=max), normalize, ]) raise ValueError(f'unknown {image_set}')
def make_default_transforms(image_set, crop=False): normalize = T.Compose([ T.ToTensor(), T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) # normalize = T.Compose([T.ToTensor()]) scales = [480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800] if image_set == "train": if not crop: trans = [ T.RandomHorizontalFlip(), T.RandomSelect( T.RandomResize(scales, max_size=1333), T.Compose([ T.RandomResize([400, 500, 600]), T.RandomSizeCrop(384, 600), T.RandomResize(scales, max_size=1333), ]), ), normalize, ] else: trans = [ T.RandomHorizontalFlip(), T.RandomResize([256, 324]), T.RandomSizeCrop(224, 224), normalize, ] return T.Compose(trans) if image_set in ["test", "val"]: return T.Compose([ T.RandomResize([800], max_size=1333), normalize, ]) raise ValueError(f"unknown {image_set}")
def make_mot_transforms(image_set, args): normalize = T.Compose([ T.ToTensor(), T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) scales = [480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800] if image_set == 'train' and not args.eval: return T.Compose([ T.RandomHorizontalFlip(), T.RandomSelect( T.RandomResize(scales, max_size=1333), T.Compose([ T.RandomResize([800, 1000, 1200]), # T.RandomSizeCrop(384, 600), T.RandomSizeCrop_MOT(800, 1200), T.RandomResize(scales, max_size=1333), ]) ), normalize, ]) if image_set == 'trainall' and not args.eval: return T.Compose([ T.RandomHorizontalFlip(), T.RandomSelect( T.RandomResize(scales, max_size=1333), T.Compose([ T.RandomResize([800, 1000, 1200]), # T.RandomSizeCrop(384, 600), T.RandomSizeCrop_MOT(800, 1200), T.RandomResize(scales, max_size=1333), ]) ), normalize, ]) if image_set == 'val' or args.eval: return T.Compose([ T.RandomResize([800], max_size=1333), normalize, ]) if image_set == 'test' or args.eval: return T.Compose([ T.RandomResize([800], max_size=1333), normalize, ]) raise ValueError(f'unknown {image_set}')
def make_self_det_transforms(image_set): normalize = T.Compose([ T.ToTensor(), T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) # The image of ImageNet is relatively small. scales = [320, 336, 352, 368, 400, 416, 432, 448, 464, 480] if image_set == 'train': return T.Compose([ # T.RandomHorizontalFlip(), HorizontalFlip may cause the pretext too difficult, so we remove it T.RandomResize(scales, max_size=600), normalize, ]) if image_set == 'val': return T.Compose([ T.RandomResize([480], max_size=600), normalize, ]) raise ValueError(f'unknown {image_set}')
def make_transforms(): """ Transforms for query images during the few-shot fine-tuning stage. """ normalize = T.Compose([ T.ToTensor(), T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) scales = [480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800] return T.Compose([ T.RandomHorizontalFlip(), T.RandomColorJitter(p=0.3333), T.RandomSelect( T.RandomResize(scales, max_size=1152), T.Compose([ T.RandomResize([400, 500, 600]), T.RandomSizeCrop(384, 600), T.RandomResize(scales, max_size=1152), ])), normalize, ])
def make_support_transforms(): """ Transforms for support images during inference stage. For transforms of support images during training, please visit dataset.py and dataset_fewshot.py """ normalize = T.Compose([ T.ToTensor(), T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) scales = [512, 528, 544, 560, 576, 592, 608, 624, 640, 656, 672, 688, 704] return T.Compose([ T.RandomHorizontalFlip(), T.RandomResize(scales, max_size=768), normalize, ])
def make_coco_transforms(args, image_set): normalize = T.Compose([ T.ToTensor(), T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) # scales = [480, 512, 544, 576, 608, 640, 672, 704, 736, 768] # max_size = 512 if image_set == 'train': return T.Compose([ T.RandomHorizontalFlip(), # T.RandomSelect( # T.RandomResize(scales, max_size=max_size), # T.Compose([ # T.RandomResize([400, 500, 600]), # T.RandomSizeCrop(384, 600), # T.RandomResize(scales, max_size=max_size), # ]) # ), T.RandomResize_Crop(crop_range=[0.5, 1], max_size=args.train_size), # T.RandomResize([400, 500, 600]), # T.RandomSizeCrop(384, 600), # T.RandomResize(scales, max_size=512), T.PadToFix(args.train_size, position="random"), normalize, ]) if image_set == 'val': return T.Compose([ T.RandomResize([args.val_size], max_size=args.val_size), T.PadToFix(args.val_size, position="start"), normalize, ]) raise ValueError(f'unknown {image_set}')
def make_voc_transforms(image_set, args): normalize = T.Compose([ T.ToTensor(), T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) if image_set == 'train': if args.backbone == 'vit': return T.Compose([ T.RandomHorizontalFlip(), T.RandomResize([(384, 384)], max_size=384), T.ToTensor(), T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), ]) else: return T.Compose([ T.RandomHorizontalFlip(), T.RandomSelect( T.RandomResize([400, 500, 600], max_size=1000), T.Compose([ T.RandomResize([400, 500, 600]), T.RandomCrop((384, 384)), T.RandomResize([400, 500, 600], max_size=1000), ])), normalize, ]) if image_set == 'val': if args.backbone == 'vit': return T.Compose([ T.RandomHorizontalFlip(), T.RandomResize([(384, 384)], max_size=384), T.ToTensor(), T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), ]) else: return T.Compose([ T.RandomResize([600], max_size=1000), normalize, ])
def eval_seq(opt, dataloader, data_type, result_filename, save_dir=None, show_image=True, frame_rate=30): if save_dir: mkdir_if_missing(save_dir) tracker = JDETracker(opt, frame_rate=frame_rate) timer = Timer() results = [] len_all = len(dataloader) start_frame = int(len_all / 2) frame_id = int(len_all / 2) for i, (path, img, img0) in enumerate(dataloader): if i < start_frame: continue if frame_id % 20 == 0: logger.info('Processing frame {} ({:.2f} fps)'.format( frame_id, 1. / max(1e-5, timer.average_time))) img_pil = Image.open(path).convert('RGB') normalize = transforms.Compose([ transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) img_norm = transforms.Compose( [T.RandomResize([800], max_size=1333), normalize]) img_norm = img_norm(img_pil) if frame_id % 20 == 0: logger.info('Processing frame {} ({:.2f} fps)'.format( frame_id, 1. / max(1e-5, timer.average_time))) # run tracking timer.tic() blob = torch.from_numpy(img).cuda().unsqueeze(0) online_targets = tracker.update(img_norm.cuda().unsqueeze(0), img0) online_tlwhs = [] online_ids = [] #online_scores = [] for t in online_targets: tlwh = t.tlwh tid = t.track_id vertical = tlwh[2] / tlwh[3] > 1.6 if tlwh[2] * tlwh[3] > opt.min_box_area and not vertical: online_tlwhs.append(tlwh) online_ids.append(tid) #online_scores.append(t.score) timer.toc() # save results results.append((frame_id + 1, online_tlwhs, online_ids)) #results.append((frame_id + 1, online_tlwhs, online_ids, online_scores)) if show_image or save_dir is not None: online_im = vis.plot_tracking(img0, online_tlwhs, online_ids, frame_id=frame_id, fps=1. / timer.average_time) if show_image: cv2.imshow('online_im', online_im) if save_dir is not None: cv2.imwrite(os.path.join(save_dir, '{:05d}.jpg'.format(frame_id)), online_im) frame_id += 1 # save results write_results(result_filename, results, data_type) #write_results_score(result_filename, results, data_type) return frame_id, timer.average_time, timer.calls
def main(args): utils.init_distributed_mode(args) print("git:\n {}\n".format(utils.get_sha())) if args.frozen_weights is not None: assert args.masks, "Frozen training is meant for segmentation only" print(args) device = torch.device(args.device) # fix the seed for reproducibility seed = args.seed + utils.get_rank() torch.manual_seed(seed) np.random.seed(seed) random.seed(seed) Dataset = get_dataset(args.dataset, args.task) f = open(args.data_cfg) data_config = json.load(f) trainset_paths = data_config['train'] dataset_root = data_config['root'] f.close() normalize = T.Compose([ T.ToTensor(), T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) scales = [480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800] transforms = T.Compose([ T.RandomHorizontalFlip(), T.RandomSelect( T.RandomResize(scales, max_size=1333), T.Compose([ T.RandomResize([400, 500, 600]), T.RandomSizeCrop(384, 600), # T.RandomSizeCrop_MOT(384, 600), T.RandomResize(scales, max_size=1333), ])), normalize, ]) dataset_train = Dataset(args, dataset_root, trainset_paths, (1088, 608), augment=True, transforms=transforms) args.nID = dataset_train.nID model, criterion, postprocessors = build_model(args) model.to(device) model_without_ddp = model # dataset_train = build_dataset(image_set='train', args=args) # dataset_val = build_dataset(image_set='val', args=args) if args.distributed: if args.cache_mode: sampler_train = samplers.NodeDistributedSampler(dataset_train) # sampler_val = samplers.NodeDistributedSampler(dataset_val, shuffle=False) else: sampler_train = samplers.DistributedSampler(dataset_train) # sampler_val = samplers.DistributedSampler(dataset_val, shuffle=False) else: sampler_train = torch.utils.data.RandomSampler(dataset_train) # sampler_val = torch.utils.data.SequentialSampler(dataset_val) batch_sampler_train = torch.utils.data.BatchSampler(sampler_train, args.batch_size, drop_last=True) data_loader_train = DataLoader(dataset_train, batch_sampler=batch_sampler_train, collate_fn=utils.collate_fn, num_workers=args.num_workers, pin_memory=True) # data_loader_val = DataLoader(dataset_val, args.batch_size, sampler=sampler_val, # drop_last=False, collate_fn=utils.collate_fn, num_workers=args.num_workers, # pin_memory=True) # data_loader_train = torch.utils.data.DataLoader( # dataset_train, # batch_size=args.batch_size, # shuffle=True, # num_workers=args.num_workers, # pin_memory=True, # drop_last=True # ) # lr_backbone_names = ["backbone.0", "backbone.neck", "input_proj", "transformer.encoder"] def match_name_keywords(n, name_keywords): out = False for b in name_keywords: if b in n: out = True break return out for n, p in model_without_ddp.named_parameters(): print(n) # 用于将classifer不更新参数 # for name,p in model_without_ddp.named_parameters(): # if name.startswith('classifier'): # p.requires_grad = False param_dicts = [{ "params": [ p for n, p in model_without_ddp.named_parameters() if not match_name_keywords(n, args.lr_backbone_names) and not match_name_keywords(n, args.lr_linear_proj_names) and p.requires_grad ], "lr": args.lr, }, { "params": [ p for n, p in model_without_ddp.named_parameters() if match_name_keywords(n, args.lr_backbone_names) and p.requires_grad ], "lr": args.lr_backbone, }, { "params": [ p for n, p in model_without_ddp.named_parameters() if match_name_keywords(n, args.lr_linear_proj_names) and p.requires_grad ], "lr": args.lr * args.lr_linear_proj_mult, }] if args.sgd: optimizer = torch.optim.SGD(param_dicts, lr=args.lr, momentum=0.9, weight_decay=args.weight_decay) else: optimizer = torch.optim.AdamW(param_dicts, lr=args.lr, weight_decay=args.weight_decay) lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, args.lr_drop) # optimizer.add_param_group({'params': criterion.parameters()}) if args.distributed: model = torch.nn.parallel.DistributedDataParallel( model, device_ids=[args.gpu]) model_without_ddp = model.module if args.frozen_weights is not None: checkpoint = torch.load(args.frozen_weights, map_location='cpu') model_without_ddp.detr.load_state_dict(checkpoint['model']) output_dir = Path(args.output_dir) if args.resume: if args.resume.startswith('https'): checkpoint = torch.hub.load_state_dict_from_url(args.resume, map_location='cpu', check_hash=True) else: checkpoint = torch.load(args.resume, map_location='cpu') model_dict = model_without_ddp.state_dict() #当前模型参数 pretrained_dict = { k: v for k, v in checkpoint['model'].items() if k not in [ "class_embed.0.weight", "class_embed.0.bias", "class_embed.1.weight", "class_embed.1.bias", "class_embed.2.weight", "class_embed.2.bias", "class_embed.3.weight", "class_embed.3.bias", "class_embed.4.weight", "class_embed.4.bias", "class_embed.5.weight", "class_embed.5.bias" ] } model_dict.update(pretrained_dict) # missing_keys, unexpected_keys = model_without_ddp.load_state_dict(checkpoint['model'], strict=False) missing_keys, unexpected_keys = model_without_ddp.load_state_dict( model_dict, strict=False) unexpected_keys = [ k for k in unexpected_keys if not (k.endswith('total_params') or k.endswith('total_ops')) ] if len(missing_keys) > 0: print('Missing Keys: {}'.format(missing_keys)) if len(unexpected_keys) > 0: print('Unexpected Keys: {}'.format(unexpected_keys)) if not args.eval and 'optimizer' in checkpoint and 'lr_scheduler' in checkpoint and 'epoch' in checkpoint: args.start_epoch = checkpoint['epoch'] + 1 # optimizer.load_state_dict(checkpoint['optimizer']) # if not args.eval and 'optimizer' in checkpoint and 'lr_scheduler' in checkpoint and 'epoch' in checkpoint: # import copy # p_groups = copy.deepcopy(optimizer.param_groups) # # optimizer.load_state_dict(checkpoint['optimizer']) # for pg, pg_old in zip(optimizer.param_groups, p_groups): # pg['lr'] = pg_old['lr'] # pg['initial_lr'] = pg_old['initial_lr'] # # print(optimizer.param_groups) # lr_scheduler.load_state_dict(checkpoint['lr_scheduler']) # # todo: this is a hack for doing experiment that resume from checkpoint and also modify lr scheduler (e.g., decrease lr in advance). # args.override_resumed_lr_drop = True # if args.override_resumed_lr_drop: # print('Warning: (hack) args.override_resumed_lr_drop is set to True, so args.lr_drop would override lr_drop in resumed lr_scheduler.') # lr_scheduler.step_size = args.lr_drop # lr_scheduler.base_lrs = list(map(lambda group: group['initial_lr'], optimizer.param_groups)) # lr_scheduler.step(lr_scheduler.last_epoch) # model.add_module('id') # [p for p in model.named_parameters() if not p[1].requires_grad] # 用于将classifer不更新参数 # optimizer = torch.optim.SGD(filter(lambda x: "classifier" not in x[0], model.parameters()), lr=args.lr, # momentum=0.9, weight_decay=1e-4) # model.classifier.training = False n_parameters = sum(p.numel() for p in model.parameters() if p.requires_grad) print('number of params:', n_parameters) print("Start training") start_time = time.time() for epoch in range(args.start_epoch, args.epochs): if args.distributed: sampler_train.set_epoch(epoch) train_stats = train_one_epoch(args, model, criterion, data_loader_train, optimizer, device, epoch, args.clip_max_norm) lr_scheduler.step() if args.output_dir: checkpoint_paths = [output_dir / 'checkpoint.pth'] # extra checkpoint before LR drop and every 5 epochs if (epoch + 1) % args.lr_drop == 0 or (epoch + 1) % 5 == 0: checkpoint_paths.append(output_dir / f'checkpoint{epoch:04}.pth') for checkpoint_path in checkpoint_paths: utils.save_on_master( { 'model': model_without_ddp.state_dict(), 'optimizer': optimizer.state_dict(), 'lr_scheduler': lr_scheduler.state_dict(), 'epoch': epoch, 'args': args, }, checkpoint_path) total_time = time.time() - start_time total_time_str = str(datetime.timedelta(seconds=int(total_time))) print('Training time {}'.format(total_time_str))