def get_args(num_classes): parser = arg_parser() args = parser.parse_args() args.datadir = 'data_inference' args.num_classes = num_classes args.batch_size = 1 args.backbone_net = 'resnet' args.modality = 'rgb' args.dataset = 'test_dataset' args.pretrained = 'test_dataset-rgb-resnet-18-ts-max-f16-cosine-bs2-e100/model_best.pth.tar' return args
def main(): global args parser = arg_parser() args = parser.parse_args() args.distributed = args.world_size > 1 or args.multiprocessing_distributed ngpus_per_node = torch.cuda.device_count() if args.multiprocessing_distributed: # Since we have ngpus_per_node processes per node, the total world_size # needs to be adjusted accordingly args.world_size = ngpus_per_node * args.world_size # Use torch.multiprocessing.spawn to launch distributed processes: the # main_worker process function mp.spawn(main_worker, nprocs=ngpus_per_node, args=(ngpus_per_node, args)) else: # Simply call main_worker function main_worker(args.gpu, ngpus_per_node, args)
def main(): global args parser = arg_parser() args = parser.parse_args() cudnn.benchmark = True num_classes, train_list_name, val_list_name, test_list_name, filename_seperator, image_tmpl, filter_video, label_file = get_dataset_config(args.dataset, args.use_lmdb) data_list_name = val_list_name if args.evaluate else test_list_name args.num_classes = num_classes if args.dataset == 'st2stv1': id_to_label, label_to_id = load_categories(os.path.join(args.datadir, label_file)) if args.gpu: os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu if args.modality == 'rgb': args.input_channels = 3 elif args.modality == 'flow': args.input_channels = 2 * 5 model, arch_name = build_model(args, test_mode=True) mean = model.mean(args.modality) std = model.std(args.modality) # overwrite mean and std if they are presented in command if args.mean is not None: if args.modality == 'rgb': if len(args.mean) != 3: raise ValueError("When training with rgb, dim of mean must be three.") elif args.modality == 'flow': if len(args.mean) != 1: raise ValueError("When training with flow, dim of mean must be three.") mean = args.mean if args.std is not None: if args.modality == 'rgb': if len(args.std) != 3: raise ValueError("When training with rgb, dim of std must be three.") elif args.modality == 'flow': if len(args.std) != 1: raise ValueError("When training with flow, dim of std must be three.") std = args.std model = model.cuda() model.eval() if args.threed_data: dummy_data = (args.input_channels, args.groups, args.input_size, args.input_size) else: dummy_data = (args.input_channels * args.groups, args.input_size, args.input_size) model_summary = torchsummary.summary(model, input_size=dummy_data) flops, params = extract_total_flops_params(model_summary) flops = int(flops.replace(',', '')) * (args.num_clips * args.num_crops) model = torch.nn.DataParallel(model).cuda() if args.pretrained is not None: print("=> using pre-trained model '{}'".format(arch_name)) checkpoint = torch.load(args.pretrained) model.load_state_dict(checkpoint['state_dict']) else: print("=> creating model '{}'".format(arch_name)) # augmentor if args.disable_scaleup: scale_size = args.input_size else: scale_size = int(args.input_size / 0.875 + 0.5) augments = [] if args.num_crops == 1: augments += [ GroupScale(scale_size), GroupCenterCrop(args.input_size) ] else: flip = True if args.num_crops == 10 else False augments += [ GroupOverSample(args.input_size, scale_size, num_crops=args.num_crops, flip=flip), ] augments += [ Stack(threed_data=args.threed_data), ToTorchFormatTensor(num_clips_crops=args.num_clips * args.num_crops), GroupNormalize(mean=mean, std=std, threed_data=args.threed_data) ] augmentor = transforms.Compose(augments) # Data loading code data_list = os.path.join(args.datadir, data_list_name) sample_offsets = list(range(-args.num_clips // 2 + 1, args.num_clips // 2 + 1)) print("Image is scaled to {} and crop {}".format(scale_size, args.input_size)) print("Number of crops: {}".format(args.num_crops)) print("Number of clips: {}, offset from center with {}".format(args.num_clips, sample_offsets)) video_data_cls = VideoDataSetLMDB if args.use_lmdb else VideoDataSet val_dataset = video_data_cls(args.datadir, data_list, args.groups, args.frames_per_group, num_clips=args.num_clips, modality=args.modality, image_tmpl=image_tmpl, dense_sampling=args.dense_sampling, fixed_offset=not args.random_sampling, transform=augmentor, is_train=False, test_mode=not args.evaluate, seperator=filename_seperator, filter_video=filter_video) data_loader = build_dataflow(val_dataset, is_train=False, batch_size=args.batch_size, workers=args.workers) log_folder = os.path.join(args.logdir, arch_name) if not os.path.exists(log_folder): os.makedirs(log_folder) batch_time = AverageMeter() if args.evaluate: logfile = open(os.path.join(log_folder, 'evaluate_log.log'), 'a') top1 = AverageMeter() top5 = AverageMeter() else: logfile = open(os.path.join(log_folder, 'test_{}crops_{}clips_{}.csv'.format(args.num_crops, args.num_clips, args.input_size)) , 'w') total_outputs = 0 outputs = np.zeros((len(data_loader) * args.batch_size, num_classes)) # switch to evaluate mode model.eval() total_batches = len(data_loader) with torch.no_grad(), tqdm(total=total_batches) as t_bar: end = time.time() for i, (video, label) in enumerate(data_loader): output = eval_a_batch(video, model, args.input_channels, num_clips=args.num_clips, num_crops=args.num_crops, modality=args.modality, softmax=True, threed_data=args.threed_data) if args.evaluate: label = label.cuda(non_blocking=True) # measure accuracy prec1, prec5 = accuracy(output, label, topk=(1, 5)) top1.update(prec1[0], video.size(0)) top5.update(prec5[0], video.size(0)) output = output.data.cpu().numpy().copy() batch_size = output.shape[0] outputs[total_outputs:total_outputs + batch_size, :] = output else: # testing, store output to prepare csv file # measure elapsed time output = output.data.cpu().numpy().copy() batch_size = output.shape[0] outputs[total_outputs:total_outputs + batch_size, :] = output predictions = np.argsort(output, axis=1) for ii in range(len(predictions)): # preds = [id_to_label[str(pred)] for pred in predictions[ii][::-1][:5]] temp = predictions[ii][::-1][:5] preds = [str(pred) for pred in temp] if args.dataset == 'st2stv1': print("{};{}".format(label[ii], id_to_label[int(preds[0])]), file=logfile) else: print("{};{}".format(label[ii], ";".join(preds)), file=logfile) total_outputs += video.shape[0] batch_time.update(time.time() - end) end = time.time() t_bar.update(1) # if not args.evaluate: outputs = outputs[:total_outputs] print("Predict {} videos.".format(total_outputs), flush=True) np.save(os.path.join(log_folder, '{}_{}crops_{}clips_{}_details.npy'.format("val" if args.evaluate else "test", args.num_crops, args.num_clips, args.input_size)), outputs) if args.evaluate: print('Val@{}({}) (# crops = {}, # clips = {}): \tTop@1: {:.4f}\tTop@5: {:.4f}\tFLOPs: {:,}\tParams:{} '.format( args.input_size, scale_size, args.num_crops, args.num_clips, top1.avg, top5.avg, flops, params), flush=True) print('Val@{}({}) (# crops = {}, # clips = {}): \tTop@1: {:.4f}\tTop@5: {:.4f}\tFLOPs: {:,}\tParams:{} '.format( args.input_size, scale_size, args.num_crops, args.num_clips, top1.avg, top5.avg, flops, params), flush=True, file=logfile) logfile.close()
def main(): global args parser = arg_parser() args = parser.parse_args() cudnn.benchmark = True num_classes, train_list_name, val_list_name, test_list_name, filename_seperator, image_tmpl, filter_video, label_file = get_dataset_config( args.dataset, args.use_lmdb) args.num_classes = num_classes if args.gpu: os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu if args.modality == 'rgb': args.input_channels = 3 elif args.modality == 'flow': args.input_channels = 2 * 5 model, arch_name = build_model(args) mean = model.mean(args.modality) std = model.std(args.modality) # overwrite mean and std if they are presented in command if args.mean is not None: if args.modality == 'rgb': if len(args.mean) != 3: raise ValueError( "When training with rgb, dim of mean must be three.") elif args.modality == 'flow': if len(args.mean) != 1: raise ValueError( "When training with flow, dim of mean must be three.") mean = args.mean if args.std is not None: if args.modality == 'rgb': if len(args.std) != 3: raise ValueError( "When training with rgb, dim of std must be three.") elif args.modality == 'flow': if len(args.std) != 1: raise ValueError( "When training with flow, dim of std must be three.") std = args.std model = model.cuda() model.eval() if args.threed_data: dummy_data = (3, args.groups, args.input_size, args.input_size) else: dummy_data = (3 * args.groups, args.input_size, args.input_size) model_summary = torchsummary.summary(model, input_size=dummy_data) torch.cuda.empty_cache() if args.show_model: print(model) print(model_summary) return 0 model = torch.nn.DataParallel(model).cuda() if args.pretrained is not None: print("=> using pre-trained model '{}'".format(arch_name)) checkpoint = torch.load(args.pretrained, map_location='cpu') if args.transfer: new_dict = {} for k, v in checkpoint['state_dict'].items(): # TODO: a better approach: if k.replace("module.", "").startswith("fc"): continue new_dict[k] = v else: new_dict = checkpoint['state_dict'] model.load_state_dict(new_dict, strict=False) else: print("=> creating model '{}'".format(arch_name)) # define loss function (criterion) and optimizer train_criterion = nn.CrossEntropyLoss().cuda() val_criterion = nn.CrossEntropyLoss().cuda() # Data loading code video_data_cls = VideoDataSetLMDB if args.use_lmdb else VideoDataSet val_list = os.path.join(args.datadir, val_list_name) val_augmentor = get_augmentor(False, args.input_size, mean, std, args.disable_scaleup, threed_data=args.threed_data, version=args.augmentor_ver, scale_range=args.scale_range) val_dataset = video_data_cls(args.datadir, val_list, args.groups, args.frames_per_group, num_clips=args.num_clips, modality=args.modality, image_tmpl=image_tmpl, dense_sampling=args.dense_sampling, transform=val_augmentor, is_train=False, test_mode=False, seperator=filename_seperator, filter_video=filter_video) val_loader = build_dataflow(val_dataset, is_train=False, batch_size=args.batch_size, workers=args.workers) log_folder = os.path.join(args.logdir, arch_name) if not os.path.exists(log_folder): os.makedirs(log_folder) if args.evaluate: logfile = open(os.path.join(log_folder, 'evaluate_log.log'), 'a') flops, params = extract_total_flops_params(model_summary) print(model_summary) val_top1, val_top5, val_losses, val_speed = validate( val_loader, model, val_criterion) print( 'Val@{}: \tLoss: {:4.4f}\tTop@1: {:.4f}\tTop@5: {:.4f}\tSpeed: {:.2f} ms/batch\tFlops: {}\tParams: {}' .format(args.input_size, val_losses, val_top1, val_top5, val_speed * 1000.0, flops, params), flush=True) print( 'Val@{}: \tLoss: {:4.4f}\tTop@1: {:.4f}\tTop@5: {:.4f}\tSpeed: {:.2f} ms/batch\tFlops: {}\tParams: {}' .format(args.input_size, val_losses, val_top1, val_top5, val_speed * 1000.0, flops, params), flush=True, file=logfile) return train_list = os.path.join(args.datadir, train_list_name) train_augmentor = get_augmentor(True, args.input_size, mean, std, threed_data=args.threed_data, version=args.augmentor_ver, scale_range=args.scale_range) train_dataset = video_data_cls(args.datadir, train_list, args.groups, args.frames_per_group, num_clips=args.num_clips, modality=args.modality, image_tmpl=image_tmpl, dense_sampling=args.dense_sampling, transform=train_augmentor, is_train=True, test_mode=False, seperator=filename_seperator, filter_video=filter_video) train_loader = build_dataflow(train_dataset, is_train=True, batch_size=args.batch_size, workers=args.workers) sgd_polices = model.parameters() optimizer = torch.optim.SGD(sgd_polices, args.lr, momentum=args.momentum, weight_decay=args.weight_decay, nesterov=args.nesterov) if args.lr_scheduler == 'step': scheduler = lr_scheduler.StepLR(optimizer, args.lr_steps[0], gamma=0.1) elif args.lr_scheduler == 'multisteps': scheduler = lr_scheduler.MultiStepLR(optimizer, args.lr_steps, gamma=0.1) elif args.lr_scheduler == 'cosine': scheduler = lr_scheduler.CosineAnnealingLR(optimizer, args.epochs, eta_min=0) elif args.lr_scheduler == 'plateau': scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, 'min', verbose=True) best_top1 = 0.0 tensorboard_logger.configure(os.path.join(log_folder)) # optionally resume from a checkpoint if args.resume: logfile = open(os.path.join(log_folder, 'log.log'), 'a') if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_top1 = checkpoint['best_top1'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) try: scheduler.load_state_dict(checkpoint['scheduler']) except: pass print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) else: raise ValueError("Checkpoint is not found: {}".format(args.resume)) else: if os.path.exists(os.path.join(log_folder, 'log.log')): shutil.copyfile( os.path.join(log_folder, 'log.log'), os.path.join(log_folder, 'log.log.{}'.format(int(time.time())))) logfile = open(os.path.join(log_folder, 'log.log'), 'w') command = " ".join(sys.argv) print(command, flush=True) print(args, flush=True) print(model, flush=True) print(model_summary, flush=True) print(command, file=logfile, flush=True) print(args, file=logfile, flush=True) if args.resume == '': print(model, file=logfile, flush=True) print(model_summary, flush=True, file=logfile) for epoch in range(args.start_epoch, args.epochs): if args.lr_scheduler == 'plateau': scheduler.step(val_losses, epoch) else: scheduler.step(epoch) try: # get_lr get all lrs for every layer of current epoch, assume the lr for all layers are identical lr = scheduler.optimizer.param_groups[0]['lr'] except: lr = None # set current learning rate # train for one epoch train_top1, train_top5, train_losses, train_speed, speed_data_loader, train_steps = \ train(train_loader, model, train_criterion, optimizer, epoch + 1, display=args.print_freq, label_smoothing=args.label_smoothing, clip_gradient=args.clip_gradient) print( 'Train: [{:03d}/{:03d}]\tLoss: {:4.4f}\tTop@1: {:.4f}\tTop@5: {:.4f}\tSpeed: {:.2f} ms/batch\tData loading: {:.2f} ms/batch' .format(epoch + 1, args.epochs, train_losses, train_top1, train_top5, train_speed * 1000.0, speed_data_loader * 1000.0), file=logfile, flush=True) print( 'Train: [{:03d}/{:03d}]\tLoss: {:4.4f}\tTop@1: {:.4f}\tTop@5: {:.4f}\tSpeed: {:.2f} ms/batch\tData loading: {:.2f} ms/batch' .format(epoch + 1, args.epochs, train_losses, train_top1, train_top5, train_speed * 1000.0, speed_data_loader * 1000.0), flush=True) # evaluate on validation set val_top1, val_top5, val_losses, val_speed = validate( val_loader, model, val_criterion) print( 'Val : [{:03d}/{:03d}]\tLoss: {:4.4f}\tTop@1: {:.4f}\tTop@5: {:.4f}\tSpeed: {:.2f} ms/batch' .format(epoch + 1, args.epochs, val_losses, val_top1, val_top5, val_speed * 1000.0), file=logfile, flush=True) print( 'Val : [{:03d}/{:03d}]\tLoss: {:4.4f}\tTop@1: {:.4f}\tTop@5: {:.4f}\tSpeed: {:.2f} ms/batch' .format(epoch + 1, args.epochs, val_losses, val_top1, val_top5, val_speed * 1000.0), flush=True) # remember best prec@1 and save checkpoint is_best = val_top1 > best_top1 best_top1 = max(val_top1, best_top1) save_dict = { 'epoch': epoch + 1, 'arch': arch_name, 'state_dict': model.state_dict(), 'best_top1': best_top1, 'optimizer': optimizer.state_dict(), 'scheduler': scheduler.state_dict() } save_checkpoint(save_dict, is_best, filepath=log_folder) if lr is not None: tensorboard_logger.log_value('learning-rate', lr, epoch + 1) tensorboard_logger.log_value('val-top1', val_top1, epoch + 1) tensorboard_logger.log_value('val-loss', val_losses, epoch + 1) tensorboard_logger.log_value('train-top1', train_top1, epoch + 1) tensorboard_logger.log_value('train-loss', train_losses, epoch + 1) tensorboard_logger.log_value('best-val-top1', best_top1, epoch + 1) logfile.close()
def main(): global args parser = arg_parser() args = parser.parse_args() cudnn.benchmark = True if args.dataset == 'st2stv2': num_classes = 174 train_list_name = 'training_256.txt' val_list_name = 'validation_256.txt' filename_seperator = " " image_tmpl = '{:05d}.jpg' filter_video = 3 elif args.dataset == 'st2stv1': num_classes = 174 train_list_name = 'training_256.txt' val_list_name = 'validation_256.txt' filename_seperator = " " image_tmpl = '{:05d}.jpg' filter_video = 3 else: # kinetics400 num_classes = 400 train_list_name = 'train_400_331.txt' val_list_name = 'val_400_331.txt' filename_seperator = ";" image_tmpl = '{:05d}.jpg' filter_video = 30 # elif args.dataset == 'moments_30fps': # num_classes = 339 # train_list_name = 'training_256.txt' # val_list_name = 'validation_256.txt' # filename_seperator = " " # image_tmpl = '{:05d}.jpg' args.num_classes = num_classes if args.gpu: os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu if args.modality == 'rgb': mean = [0.485, 0.456, 0.406] std = [0.229, 0.224, 0.225] elif args.modality == 'flow': mean = [0.5] std = [np.mean([0.229, 0.224, 0.225])] if args.modality == 'rgb': args.input_channels = 3 elif args.modality == 'flow': args.input_channels = 2 * 5 model, arch_name = build_model(args) if args.pretrained is not None: print("=> using pre-trained model '{}'".format(arch_name)) else: print("=> creating model '{}'".format(arch_name)) model = model.cuda() model = torch.nn.DataParallel(model).cuda() # define loss function (criterion) and optimizer train_criterion = nn.CrossEntropyLoss().cuda() val_criterion = nn.CrossEntropyLoss().cuda() # Data loading code val_list = os.path.join(args.datadir, val_list_name) val_augmentor = get_augmentor( False, args.input_shape, mean=mean, std=std, disable_scaleup=args.disable_scaleup, is_flow=True if args.modality == 'flow' else False) val_dataset = VideoDataSet("", val_list, args.groups, args.frames_per_group, num_clips=args.num_clips, modality=args.modality, image_tmpl=image_tmpl, dense_sampling=args.dense_sampling, transform=val_augmentor, is_train=False, test_mode=False, seperator=filename_seperator, filter_video=filter_video, num_classes=args.num_classes) val_loader = build_dataflow(val_dataset, is_train=False, batch_size=args.batch_size, workers=args.workers) log_folder = os.path.join(args.logdir, arch_name) if not os.path.exists(log_folder): os.makedirs(log_folder) if args.evaluate: logfile = open(os.path.join(log_folder, 'evaluate_log.log'), 'a') val_top1, val_top5, val_losses, val_speed = validate( val_loader, model, val_criterion) print( 'Val@{}: \tLoss: {:4.4f}\tTop@1: {:.4f}\tTop@5: {:.4f}\tSpeed: {:.2f} ms/batch' .format(args.input_shape, val_losses, val_top1, val_top5, val_speed * 1000.0), flush=True) print( 'Val@{}: \tLoss: {:4.4f}\tTop@1: {:.4f}\tTop@5: {:.4f}\tSpeed: {:.2f} ms/batch' .format(args.input_shape, val_losses, val_top1, val_top5, val_speed * 1000.0), flush=True, file=logfile) return train_list = os.path.join(args.datadir, train_list_name) train_augmentor = get_augmentor( True, args.input_shape, mean=mean, std=std, disable_scaleup=args.disable_scaleup, is_flow=True if args.modality == 'flow' else False) train_dataset = VideoDataSet("", train_list, args.groups, args.frames_per_group, num_clips=args.num_clips, modality=args.modality, image_tmpl=image_tmpl, dense_sampling=args.dense_sampling, transform=train_augmentor, is_train=True, test_mode=False, seperator=filename_seperator, filter_video=filter_video, num_classes=args.num_classes) train_loader = build_dataflow(train_dataset, is_train=True, batch_size=args.batch_size, workers=args.workers) optimizer = torch.optim.SGD(model.parameters(), args.lr, momentum=args.momentum, weight_decay=args.weight_decay, nesterov=True) if args.lr_scheduler == 'step': scheduler = lr_scheduler.StepLR(optimizer, args.lr_steps[0], gamma=0.1) elif args.lr_scheduler == 'multisteps': scheduler = lr_scheduler.MultiStepLR(optimizer, args.lr_steps, gamma=0.1) elif args.lr_scheduler == 'cosine': scheduler = lr_scheduler.CosineAnnealingLR(optimizer, args.epochs, eta_min=0) elif args.lr_scheduler == 'plateau': scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, 'min', verbose=True) best_top1 = 0.0 tensorboard_logger.configure(os.path.join(log_folder)) # optionally resume from a checkpoint if args.resume: logfile = open(os.path.join(log_folder, 'log.log'), 'a') if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_top1 = checkpoint['best_top1'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) if args.lr_scheduler == 'plateau': scheduler.load_state_dict(checkpoint['scheduler']) print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(args.resume)) else: if os.path.exists(os.path.join(log_folder, 'log.log')): shutil.copyfile( os.path.join(log_folder, 'log.log'), os.path.join(log_folder, 'log.log.{}'.format(int(time.time())))) logfile = open(os.path.join(log_folder, 'log.log'), 'w') print(args, flush=True) print(model, flush=True) print(args, file=logfile, flush=True) if args.resume is None: print(model, file=logfile, flush=True) for epoch in range(args.start_epoch, args.epochs): if args.lr_scheduler == 'plateau': scheduler.step(val_losses, epoch) else: scheduler.step(epoch) try: # get_lr get all lrs for every layer of current epoch, assume the lr for all layers are identical lr = scheduler.optimizer.param_groups[0]['lr'] except: lr = None # set current learning rate # train for one epoch train_top1, train_top5, train_losses, train_speed, speed_data_loader, train_steps = \ train(train_loader, model, train_criterion, optimizer, epoch + 1, display=args.print_freq) print( 'Train: [{:03d}/{:03d}]\tLoss: {:4.4f}\tTop@1: {:.4f}\tTop@5: {:.4f}\tSpeed: {:.2f} ms/batch\tData loading: {:.2f} ms/batch' .format(epoch + 1, args.epochs, train_losses, train_top1, train_top5, train_speed * 1000.0, speed_data_loader * 1000.0), file=logfile, flush=True) print( 'Train: [{:03d}/{:03d}]\tLoss: {:4.4f}\tTop@1: {:.4f}\tTop@5: {:.4f}\tSpeed: {:.2f} ms/batch\tData loading: {:.2f} ms/batch' .format(epoch + 1, args.epochs, train_losses, train_top1, train_top5, train_speed * 1000.0, speed_data_loader * 1000.0), flush=True) # evaluate on validation set val_top1, val_top5, val_losses, val_speed = validate( val_loader, model, val_criterion) print( 'Val : [{:03d}/{:03d}]\tLoss: {:4.4f}\tTop@1: {:.4f}\tTop@5: {:.4f}\tSpeed: {:.2f} ms/batch' .format(epoch + 1, args.epochs, val_losses, val_top1, val_top5, val_speed * 1000.0), file=logfile, flush=True) print( 'Val : [{:03d}/{:03d}]\tLoss: {:4.4f}\tTop@1: {:.4f}\tTop@5: {:.4f}\tSpeed: {:.2f} ms/batch' .format(epoch + 1, args.epochs, val_losses, val_top1, val_top5, val_speed * 1000.0), flush=True) # remember best prec@1 and save checkpoint is_best = val_top1 > best_top1 best_top1 = max(val_top1, best_top1) save_dict = { 'epoch': epoch + 1, 'arch': arch_name, 'state_dict': model.state_dict(), 'best_top1': best_top1, 'optimizer': optimizer.state_dict(), } if args.lr_scheduler == 'plateau': save_dict['scheduler'] = scheduler.state_dict() save_checkpoint(save_dict, is_best, filepath=log_folder) if lr is not None: tensorboard_logger.log_value('learning-rate', lr, epoch + 1) tensorboard_logger.log_value('val-top1', val_top1, epoch + 1) tensorboard_logger.log_value('val-loss', val_losses, epoch + 1) tensorboard_logger.log_value('train-top1', train_top1, epoch + 1) tensorboard_logger.log_value('train-loss', train_losses, epoch + 1) tensorboard_logger.log_value('best-val-top1', best_top1, epoch + 1) logfile.close()
import torch.nn as nn import torchvision import torch.optim as optim import torch.backends.cudnn as cudnn import os import numpy as np from dataset import ucf101_val3d from dataset import hmdb51_val3d from get_model import get_model import sklearn.metrics import time from opts import arg_parser parser = arg_parser() args = parser.parse_args() best_prec1 = 0 torch.backends.cudnn.benchmark = True ckpt_path = '/4T/zhujian/ckpt' def main(): global best_prec1 batch_size = args.batch_size lr = args.learning_rate epochs = args.epochs val_freq = args.val_freq num_frames = args.num_frames num_workers = args.num_workers sample_clips = args.sample_clips
def main(): global args parser = arg_parser() args = parser.parse_args() cudnn.benchmark = True id_to_label = {} if args.dataset == 'st2stv2': num_classes = 174 data_list_name = 'validation_256.txt' if args.evaluate else 'testing_256.txt' filename_seperator = " " image_tmpl = '{:05d}.jpg' filter_video = 3 elif args.dataset == 'st2stv1': num_classes = 174 data_list_name = 'validation_256.txt' if args.evaluate else 'testing_256.txt' filename_seperator = " " image_tmpl = '{:05d}.jpg' label_file = 'something-something-v1-labels.csv' filter_video = 3 id_to_label, label_to_id = load_categories(os.path.join(args.datadir, label_file)) else: # 'kinetics400' num_classes = 400 data_list_name = 'val_400_331.txt' if args.evaluate else 'test_400_331.txt' filename_seperator = ";" image_tmpl = '{:05d}.jpg' filter_video = 30 args.num_classes = num_classes if args.gpu: os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu if args.modality == 'rgb': mean = [0.485, 0.456, 0.406] std = [0.229, 0.224, 0.225] else: # flow mean = [0.5] std = [np.mean([0.229, 0.224, 0.225])] if args.modality == 'rgb': args.input_channels = 3 elif args.modality == 'flow': args.input_channels = 2 * 5 model, arch_name = build_model(args, test_mode=True) if args.pretrained is not None: print("=> using pre-trained model '{}'".format(arch_name)) else: print("=> creating model '{}'".format(arch_name)) model = model.cuda() model = torch.nn.DataParallel(model).cuda() # augmentor if args.disable_scaleup: scale_size = args.input_shape else: scale_size = int(args.input_shape / 0.875 + 0.5) augments = [] if args.num_crops == 1: augments += [ GroupScale(scale_size), GroupCenterCrop(args.input_shape) ] else: flip = True if args.num_crops == 10 else False augments += [ GroupOverSample(args.input_shape, scale_size, num_crops=args.num_crops, flip=flip), ] augments += [ Stack(), ToTorchFormatTensor(num_clips_crops=args.num_clips * args.num_crops), GroupNormalize(mean=mean, std=std) ] augmentor = transforms.Compose(augments) # Data loading code data_list = os.path.join(args.datadir, data_list_name) sample_offsets = list(range(-args.num_clips // 2 + 1, args.num_clips // 2 + 1)) print("Image is scaled to {} and crop {}".format(scale_size, args.input_shape)) print("Number of crops: {}".format(args.num_crops)) print("Number of clips: {}, offset from center with {}".format(args.num_clips, sample_offsets)) val_dataset = VideoDataSet("", data_list, args.groups, args.frames_per_group, num_clips=args.num_clips, modality=args.modality, image_tmpl=image_tmpl, dense_sampling=args.dense_sampling, fixed_offset=not args.random_sampling, transform=augmentor, is_train=False, test_mode=not args.evaluate, seperator=filename_seperator, filter_video=filter_video) data_loader = build_dataflow(val_dataset, is_train=False, batch_size=args.batch_size, workers=args.workers) log_folder = os.path.join(args.logdir, arch_name) if not os.path.exists(log_folder): os.makedirs(log_folder) batch_time = AverageMeter() if args.evaluate: logfile = open(os.path.join(log_folder, 'evaluate_log.log'), 'a') top1 = AverageMeter() top5 = AverageMeter() else: logfile = open(os.path.join(log_folder, 'test_{}crops_{}clips_{}.csv'.format(args.num_crops, args.num_clips, args.input_shape)), 'w') total_outputs = 0 outputs = np.zeros((len(data_loader) * args.batch_size, num_classes)) # switch to evaluate mode model.eval() total_batches = len(data_loader) with torch.no_grad(), tqdm(total=total_batches) as t_bar: end = time.time() for i, (video, label) in enumerate(data_loader): output = eval_a_batch(video, model, num_clips=args.num_clips, num_crops=args.num_crops, softmax=True) if args.evaluate: label = label.cuda(non_blocking=True) # measure accuracy prec1, prec5 = accuracy(output, label, topk=(1, 5)) top1.update(prec1[0], video.size(0)) top5.update(prec5[0], video.size(0)) output = output.data.cpu().numpy().copy() batch_size = output.shape[0] outputs[total_outputs:total_outputs + batch_size, :] = output else: # testing, store output to prepare csv file # measure elapsed time output = output.data.cpu().numpy().copy() batch_size = output.shape[0] outputs[total_outputs:total_outputs + batch_size, :] = output predictions = np.argsort(output, axis=1) for ii in range(len(predictions)): temp = predictions[ii][::-1][:5] preds = [str(pred) for pred in temp] if args.dataset == 'st2stv1': print("{};{}".format(label[ii], id_to_label[int(preds[0])]), file=logfile) else: print("{};{}".format(label[ii], ";".join(preds)), file=logfile) total_outputs += video.shape[0] batch_time.update(time.time() - end) end = time.time() t_bar.update(1) # if not args.evaluate: outputs = outputs[:total_outputs] print("Predict {} videos.".format(total_outputs), flush=True) np.save(os.path.join(log_folder, '{}_{}crops_{}clips_{}_details.npy'.format( "val" if args.evaluate else "test", args.num_crops, args.num_clips, args.input_shape)), outputs) if args.evaluate: print( 'Val@{}({}) (# crops = {}, # clips = {}): \tTop@1: {:.4f}\tTop@5: {:.4f}\t'.format( args.input_shape, scale_size, args.num_crops, args.num_clips, top1.avg, top5.avg ), flush=True) print( 'Val@{}({}) (# crops = {}, # clips = {}): \tTop@1: {:.4f}\tTop@5: {:.4f}\t'.format( args.input_shape, scale_size, args.num_crops, args.num_clips, top1.avg, top5.avg ), flush=True, file=logfile) logfile.close()