def __init__(self, attr_lens, sample_width, sample_height, sample_duration, model_type="tp", **kwargs): super(AttrResNet503D, self).__init__() resnet50 = resnet3d.resnet50(sample_width=sample_width, sample_height=sample_height, sample_duration=sample_duration) self.base = nn.Sequential(*list(resnet50.children())[:-2]) self.feature_dim = 512 self.idrelated_classifier = MultiLabelLinearAttributeModule( self.feature_dim, attr_lens[0]) self.idunrelated_classifier = MultiLabelLinearAttributeModule( self.feature_dim, attr_lens[1])
def main(): torch.manual_seed(args.seed) os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_devices use_gpu = torch.cuda.is_available() if args.use_cpu: use_gpu = False if not args.evaluate: sys.stdout = Logger(osp.join(args.save_dir, 'log_train.txt')) else: sys.stdout = Logger(osp.join(args.save_dir, 'log_test.txt')) print("==========\nArgs:{}\n==========".format(args)) if use_gpu: print("Currently using GPU {}".format(args.gpu_devices)) cudnn.benchmark = True torch.cuda.manual_seed_all(args.seed) else: print("Currently using CPU (GPU is highly recommended)") print("Initializing dataset {}".format(args.dataset)) dataset = data_manager.init_dataset(name=args.dataset) transform_train = T.Compose([ T.Random2DTranslation(args.height, args.width), T.RandomHorizontalFlip(), T.ToTensor(), T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ]) transform_test = T.Compose([ T.Resize((args.height, args.width)), T.ToTensor(), T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ]) pin_memory = True if use_gpu else False trainloader = DataLoader( VideoDataset(dataset.train, seq_len=args.seq_len, sample='random', transform=transform_train), sampler=RandomIdentitySampler(dataset.train, num_instances=args.num_instances), batch_size=args.train_batch, num_workers=args.workers, pin_memory=pin_memory, drop_last=True, ) queryloader = DataLoader( VideoDataset(dataset.query, seq_len=args.seq_len, sample='dense', transform=transform_test), batch_size=args.test_batch, shuffle=False, num_workers=args.workers, pin_memory=pin_memory, drop_last=False, ) galleryloader = DataLoader( VideoDataset(dataset.gallery, seq_len=args.seq_len, sample='dense', transform=transform_test), batch_size=args.test_batch, shuffle=False, num_workers=args.workers, pin_memory=pin_memory, drop_last=False, ) print("Initializing model: {}".format(args.arch)) if args.arch == 'resnet503d': model = resnet3d.resnet50(num_classes=dataset.num_train_pids, sample_width=args.width, sample_height=args.height, sample_duration=args.seq_len) if not os.path.exists(args.pretrained_model): raise IOError("Can't find pretrained model: {}".format( args.pretrained_model)) print("Loading checkpoint from '{}'".format(args.pretrained_model)) checkpoint = torch.load(args.pretrained_model) state_dict = {} for key in checkpoint['state_dict']: if 'fc' in key: continue state_dict[key.partition("module.") [2]] = checkpoint['state_dict'][key] model.load_state_dict(state_dict, strict=False) else: model = models.init_model(name=args.arch, num_classes=dataset.num_train_pids, loss={'xent', 'htri'}) print("Model size: {:.5f}M".format( sum(p.numel() for p in model.parameters()) / 1000000.0)) criterion_xent = CrossEntropyLabelSmooth( num_classes=dataset.num_train_pids, use_gpu=use_gpu) criterion_htri = TripletLoss(margin=args.margin) optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) if args.stepsize > 0: scheduler = lr_scheduler.StepLR(optimizer, step_size=args.stepsize, gamma=args.gamma) start_epoch = args.start_epoch if use_gpu: model = nn.DataParallel(model).cuda() if args.evaluate: print("Evaluate only") test(model, queryloader, galleryloader, args.pool, use_gpu) return start_time = time.time() best_rank1 = -np.inf if args.arch == 'resnet503d': torch.backends.cudnn.benchmark = False for epoch in range(start_epoch, args.max_epoch): print("==> Epoch {}/{}".format(epoch + 1, args.max_epoch)) train(model, criterion_xent, criterion_htri, optimizer, trainloader, use_gpu) if args.stepsize > 0: scheduler.step() if args.eval_step > 0 and (epoch + 1) % args.eval_step == 0 or ( epoch + 1) == args.max_epoch: print("==> Test") rank1 = test(model, queryloader, galleryloader, args.pool, use_gpu) is_best = rank1 > best_rank1 if is_best: best_rank1 = rank1 if use_gpu: state_dict = model.module.state_dict() else: state_dict = model.state_dict() save_checkpoint( { 'state_dict': state_dict, 'rank1': rank1, 'epoch': epoch, }, is_best, osp.join(args.save_dir, 'checkpoint_ep' + str(epoch + 1) + '.pth.tar')) elapsed = round(time.time() - start_time) elapsed = str(datetime.timedelta(seconds=elapsed)) print("Finished. Total elapsed time (h:m:s): {}".format(elapsed))
def main(): torch.manual_seed(args.seed) os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_devices use_gpu = torch.cuda.is_available() if args.use_cpu: use_gpu = False sys.stdout = Logger(osp.join(args.save_dir, 'log_test.txt')) print("==========\nArgs:{}\n==========".format(args)) if use_gpu: print("Currently using GPU {}".format(args.gpu_devices)) cudnn.benchmark = True torch.cuda.manual_seed_all(args.seed) else: print("Currently using CPU (GPU is highly recommended)") print("Initializing dataset {}".format(args.dataset)) dataset = data_manager.init_dataset(name=args.dataset) transform_test = T.Compose([ T.Resize((args.height, args.width)), T.ToTensor(), T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ]) pin_memory = True if use_gpu else False queryloader = DataLoader( VideoDataset(dataset.query, seq_len=args.seq_len, sample='dense', transform=transform_test), batch_size=args.test_batch, shuffle=False, num_workers=args.workers, pin_memory=pin_memory, drop_last=False, ) galleryloader = DataLoader( VideoDataset(dataset.gallery, seq_len=args.seq_len, sample='dense', transform=transform_test), batch_size=args.test_batch, shuffle=False, num_workers=args.workers, pin_memory=pin_memory, drop_last=False, ) if args.arch == 'resnet503d': cudnn.benchmark = False print("Initializing model: {}".format(args.arch)) if args.arch == 'resnet503d': model = resnet3d.resnet50(num_classes=dataset.num_train_pids, sample_width=args.width, sample_height=args.height, sample_duration=args.seq_len) if not os.path.exists(args.best_model): raise IOError("Can't find best model: {}".format(args.best_model)) print("Loading checkpoint from '{}'".format(args.best_model)) checkpoint = torch.load(args.best_model) state_dict = {} for key in checkpoint['state_dict']: state_dict[key] = checkpoint['state_dict'][key] model.load_state_dict(state_dict, strict=False) else: model = models.init_model(name=args.arch, num_classes=dataset.num_train_pids, loss={'xent', 'htri'}) if not os.path.exists(args.best_model): raise IOError("Can't find best model: {}".format(args.best_model)) print("Loading checkpoint from '{}'".format(args.best_model)) checkpoint = torch.load(args.best_model) state_dict = {} for key in checkpoint['state_dict']: state_dict[key] = checkpoint['state_dict'][key] model.load_state_dict(state_dict, strict=False) print("Model size: {:.5f}M".format( sum(p.numel() for p in model.parameters()) / 1000000.0)) if use_gpu: model = nn.DataParallel(model).cuda() if args.evaluate: print("Evaluate only") test(model, queryloader, galleryloader, args.pool, use_gpu) # distmat = test(model, queryloader, galleryloader, args.pool, use_gpu) # rnn时不能这么做,否则out of memory # if args.vis_ranked_res: # visualize_ranked_results( # distmat, dataset, # save_dir=osp.join(args.save_dir, 'ranked_results'), # topk=20, # ) return
def testseq(dataset_name, use_gpu): dataset_root = './video2img/track1_sct_img_test_big/' dataset = Graph_data_manager.AICityTrack2(root=dataset_root) width = 224 height = 224 transform_train = T.Compose([ T.Random2DTranslation(height, width), T.RandomHorizontalFlip(), T.ToTensor(), T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ]) transform_test = T.Compose([ T.Resize((height, width)), T.ToTensor(), T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ]) pin_memory = True if use_gpu else False seq_len = 4 num_instance = 4 train_batch = 32 test_batch = 1 queryloader = DataLoader( VideoDataset(dataset.query, seq_len=seq_len, sample='dense', transform=transform_test), batch_size=test_batch, shuffle=False, num_workers=4, pin_memory=pin_memory, drop_last=False, ) arch = "resnet50ta" pretrained_model = "./log/track12_ta224_checkpoint_ep500.pth.tar" start_epoch = 0 print("Initializing model: {}".format(arch)) dataset.num_train_pids = 517 if arch == 'resnet503d': model = resnet3d.resnet50(num_classes=dataset.num_train_pids, sample_width=width, sample_height=height, sample_duration=seq_len) if not os.path.exists(pretrained_model): raise IOError( "Can't find pretrained model: {}".format(pretrained_model)) print("Loading checkpoint from '{}'".format(pretrained_model)) checkpoint = torch.load(pretrained_model) state_dict = {} for key in checkpoint['state_dict']: if 'fc' in key: continue state_dict[key.partition("module.") [2]] = checkpoint['state_dict'][key] model.load_state_dict(state_dict, strict=False) else: if not os.path.exists(pretrained_model): model = models.init_model(name=arch, num_classes=dataset.num_train_pids, loss={'xent', 'htri'}) else: model = models.init_model(name=arch, num_classes=dataset.num_train_pids, loss={'xent', 'htri'}) checkpoint = torch.load(pretrained_model) model.load_state_dict(checkpoint['state_dict']) start_epoch = checkpoint['epoch'] + 1 print("Loaded checkpoint from '{}'".format(pretrained_model)) print("- start_epoch: {}\n- rank1: {}".format( start_epoch, checkpoint['rank1'])) print("Model size: {:.5f}M".format( sum(p.numel() for p in model.parameters()) / 1000000.0)) criterion_xent = CrossEntropyLabelSmooth( num_classes=dataset.num_train_pids, use_gpu=use_gpu) criterion_htri = TripletLoss(margin=0.3) lr = 0.0003 gamma = 0.1 stepsize = 200 weight_decay = 5e-04 optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay) if stepsize > 0: scheduler = lr_scheduler.StepLR(optimizer, step_size=stepsize, gamma=gamma) start_epoch = start_epoch if use_gpu: model = nn.DataParallel(model).cuda() test(model, queryloader, 'avg', use_gpu, dataset, -1, meta_data_tab=None)
def resnet50(config): return resnet3d.resnet50(sample_input_D=128, sample_input_H=128, sample_input_W=128, num_seg_classes=config.num_classes, shortcut_type='B')
def main(): torch.manual_seed(args.seed) # 为CPU设置种子用于生成随机数,以使得结果是确定的 os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_devices # 在代码中指定需要使用的GPU use_gpu = torch.cuda.is_available() # 查看当前环境是否支持CUDA,支持返回true,不支持返回false if args.use_cpu: use_gpu = False if not args.evaluate: # 如果不是评估,那就是训练,输出训练日志;否则输出测试日志。 sys.stdout = Logger(osp.join(args.save_dir, 'log_train.txt')) else: sys.stdout = Logger(osp.join(args.save_dir, 'log_test.txt')) print("==========\nArgs:{}\n==========".format(args)) # 打印所有参数 if use_gpu: # 如果使用gpu,输出选定的gpu, print("Currently using GPU {}".format(args.gpu_devices)) cudnn.benchmark = True # 在程序刚开始加这条语句可以提升一点训练速度,没什么额外开销 torch.cuda.manual_seed_all(args.seed) # 为GPU设置种子用于生成随机数,以使得结果是确定的 else: print("Currently using CPU (GPU is highly recommended)") print("Initializing dataset {}".format(args.dataset)) dataset = data_manager.init_dataset(name=args.dataset) # 初始化数据集,从data_manager.py文件中加载。 # import transforms as T. # T.Compose=一起组合几个变换。 transform_train = T.Compose([ T.Random2DTranslation(args.height, args.width), # 以一个概率进行,首先将图像大小增加到(1 + 1/8),然后执行随机裁剪。 T.RandomHorizontalFlip(), # 以给定的概率(0.5)随机水平翻转给定的PIL图像。 T.ToTensor(), # 将``PIL Image``或``numpy.ndarray``转换为张量。 T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), # 用平均值和标准偏差归一化张量图像。 # input[channel] = (input[channel] - mean[channel]) / std[channel] ]) transform_test = T.Compose([ T.Resize((args.height, args.width)), # 将输入PIL图像的大小调整为给定大小。 T.ToTensor(), T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ]) # 设置pin_memory=True,则意味着生成的Tensor数据最开始是属于内存中的锁页内存,这样将内存的Tensor转义到GPU的显存就会更快一些。 pin_memory = True if use_gpu else False # DataLoader数据加载器。 组合数据集和采样器,并在数据集上提供单进程或多进程迭代器。 trainloader = DataLoader( # VideoDataset:基于视频的person reid的数据集.(训练的数据集,视频序列长度,采样方法:随机,进行数据增强) VideoDataset(dataset.train, seq_len=args.seq_len, sample='random', transform=transform_train), # 随机抽样N个身份,然后对于每个身份,随机抽样K个实例,因此批量大小为N * K. sampler=RandomIdentitySampler(dataset.train, num_instances=args.num_instances), batch_size=args.train_batch, # 训练的批次大小 num_workers=args.workers, # 多进程的数目 pin_memory=pin_memory, drop_last=True, ) # 如果数据集大小不能被批量大小整除,则设置为“True”以删除最后一个不完整的批次。 queryloader = DataLoader( VideoDataset(dataset.query, seq_len=args.seq_len, sample='dense', transform=transform_test), batch_size=args.test_batch, shuffle=False, # 设置为“True”以使数据在每个时期重新洗牌(默认值:False)。 num_workers=args.workers, pin_memory=pin_memory, drop_last=False, # 如果“False”和数据集的大小不能被批量大小整除,那么最后一批将更小。 ) galleryloader = DataLoader( VideoDataset(dataset.gallery, seq_len=args.seq_len, sample='dense', transform=transform_test), batch_size=args.test_batch, shuffle=False, num_workers=args.workers, pin_memory=pin_memory, drop_last=False, ) print("Initializing model: {}".format(args.arch)) # 模型的初始化 if args.arch == 'resnet503d': model = resnet3d.resnet50(num_classes=dataset.num_train_pids, sample_width=args.width, sample_height=args.height, sample_duration=args.seq_len) # 如果不存在预训练模型,则报错 if not os.path.exists(args.pretrained_model): raise IOError("Can't find pretrained model: {}".format(args.pretrained_model)) # 导入预训练的模型 print("Loading checkpoint from '{}'".format(args.pretrained_model)) checkpoint = torch.load(args.pretrained_model) state_dict = {} # 状态字典,从checkpoint文件中加载参数 for key in checkpoint['state_dict']: if 'fc' in key: continue state_dict[key.partition("module.")[2]] = checkpoint['state_dict'][key] model.load_state_dict(state_dict, strict=False) else: model = models.init_model(name=args.arch, num_classes=dataset.num_train_pids, loss={'xent', 'htri'}) print("Model size: {:.5f}M".format(sum(p.numel() for p in model.parameters())/1000000.0)) # 损失函数:xent:softmax交叉熵损失函数。htri:三元组损失函数。 criterion_xent = CrossEntropyLabelSmooth(num_classes=dataset.num_train_pids, use_gpu=use_gpu) criterion_htri = TripletLoss(margin=args.margin) # 优化器:adam optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) # stepsize,逐步减少学习率(> 0表示已启用) if args.stepsize > 0: scheduler = lr_scheduler.StepLR(optimizer, step_size=args.stepsize, gamma=args.gamma) # lr_scheduler学习率计划,StepLR,将每个参数组的学习速率设置为每个步长时期由gamma衰减的初始lr. start_epoch = args.start_epoch # 手动时期编号(重启时有用) if use_gpu: model = nn.DataParallel(model).cuda() # 多GPU训练 # DataParallel是torch.nn下的一个类,需要制定的参数是module(可以多gpu运行的类函数)和input(数据集) if args.evaluate: # 这里的evaluate没有意义,应该添加代码导入保存的checkpoint,再test print("Evaluate only") # 进行评估 test(model, queryloader, galleryloader, args.pool, use_gpu) return start_time = time.time() # 开始的时间 best_rank1 = -np.inf # 初始化,负无穷 if args.arch == 'resnet503d': # 如果模型为resnet503d, torch.backends.cudnn.benchmark = False for epoch in range(start_epoch, args.max_epoch): # epoch,从开始到最大,进行训练。 print("==> Epoch {}/{}".format(epoch+1, args.max_epoch)) train(model, criterion_xent, criterion_htri, optimizer, trainloader, use_gpu) if args.stepsize > 0: scheduler.step() # 如果运行一次评估的需要的epoch数大于0,并且当前epoch+1能整除这个epoch数,或者等于最大epoch数。那么就进行一次评估。 if args.eval_step > 0 and (epoch+1) % args.eval_step == 0 or (epoch+1) == args.max_epoch: print("==> Test") rank1 = test(model, queryloader, galleryloader, args.pool, use_gpu) is_best = rank1 > best_rank1 # 比较,大于则返回true,否则返回false。 if is_best: best_rank1 = rank1 if use_gpu: state_dict = model.module.state_dict() # 函数static_dict()用于返回包含模块所有状态的字典,包括参数和缓存。 else: state_dict = model.state_dict() # 保存checkpoint文件 save_checkpoint({ 'state_dict': state_dict, 'rank1': rank1, 'epoch': epoch, }, is_best, osp.join(args.save_dir, 'checkpoint_ep' + str(epoch+1) + '.pth.tar')) # 经过的时间 elapsed = round(time.time() - start_time) # round() 方法返回浮点数x的四舍五入值 elapsed = str(datetime.timedelta(seconds=elapsed)) # 对象代表两个时间之间的时间差, print("Finished. Total elapsed time (h:m:s): {}".format(elapsed))