comp_weights = weights reg_weights = weights rel_props = score_pickle_list[0][vid][0] return rel_props, \ merge_part(arrays, 1, act_weights), \ merge_part(arrays, 2, comp_weights), \ merge_part(arrays, 3, reg_weights) print('Merge detection scores from {} sources...'.format( len(score_pickle_list))) detection_scores = {k: merge_scores(k) for k in score_pickle_list[0]} print('Done.') dataset = SSNDataSet("", test_prop_file, verbose=False) dataset_detections = [dict() for i in range(num_class)] if args.cls_scores: print('Using classifier scores from {}'.format(args.cls_scores)) cls_score_pc = pickle.load(open(args.cls_scores, 'rb'), encoding='bytes') cls_score_dict = { os.path.splitext(os.path.basename(k.decode('utf-8')))[0]: v for k, v in cls_score_pc.items() } else: cls_score_dict = None # generate detection results def gen_detection_results(video_id, score_tp):
def main(): global args, best_loss args = parser.parse_args() dataset_configs = get_configs(args.dataset) num_class = dataset_configs['num_class'] stpp_configs = tuple(dataset_configs['stpp']) sampling_configs = dataset_configs['sampling'] model = SSN(num_class, args.num_aug_segments, args.num_body_segments, args.num_aug_segments, args.modality, base_model=args.arch, dropout=args.dropout, stpp_cfg=stpp_configs, bn_mode=args.bn_mode) if args.init_weights: if os.path.isfile(args.init_weights): print(("=> loading pretrained weigths '{}'".format( args.init_weights))) wd = torch.load(args.init_weights) model.base_model.load_state_dict(wd['state_dict']) print( ("=> loaded init weights from '{}'".format(args.init_weights))) else: print( ("=> no weights file found at '{}'".format(args.init_weights))) elif args.kinetics_pretrain: model_url = dataset_configs['kinetics_pretrain'][args.arch][ args.modality] model.base_model.load_state_dict( model_zoo.load_url(model_url)['state_dict']) print(("=> loaded init weights from '{}'".format(model_url))) else: # standard ImageNet pretraining if args.modality == 'Flow': model_url = dataset_configs['flow_init'][args.arch] model.base_model.load_state_dict( model_zoo.load_url(model_url)['state_dict']) print(("=> loaded flow init weights from '{}'".format(model_url))) crop_size = model.crop_size scale_size = model.scale_size input_mean = model.input_mean input_std = model.input_std policies = model.get_optim_policies() train_augmentation = model.get_augmentation() model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda() if args.resume: if os.path.isfile(args.resume): print(("=> loading checkpoint '{}'".format(args.resume))) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_loss = checkpoint['best_loss'] model.load_state_dict(checkpoint['state_dict']) print(("=> loaded checkpoint '{}' (epoch {})".format( args.evaluate, checkpoint['epoch']))) else: print(("=> no checkpoint found at '{}'".format(args.resume))) cudnn.benchmark = True pin_memory = (args.modality == 'RGB') # Data loading code if args.modality != 'RGBDiff': normalize = GroupNormalize(input_mean, input_std) else: normalize = IdentityTransform() if args.modality == 'RGB': data_length = 1 elif args.modality in ['Flow', 'RGBDiff']: data_length = 5 else: raise ValueError("unknown modality {}".format(args.modality)) train_prop_file = 'data/{}_proposal_list.txt'.format( dataset_configs['train_list']) val_prop_file = 'data/{}_proposal_list.txt'.format( dataset_configs['val_list']) train_loader = torch.utils.data.DataLoader( SSNDataSet( "", train_prop_file, epoch_multiplier=args.training_epoch_multiplier, new_length=data_length, modality=args.modality, exclude_empty=True, **sampling_configs, aug_seg=args.num_aug_segments, body_seg=args.num_body_segments, image_tmpl="img_{:05d}.jpg" if args.modality in ["RGB", "RGBDiff"] else args.flow_prefix + "{}_{:05d}.jpg", transform=torchvision.transforms.Compose([ train_augmentation, Stack(roll=(args.arch in ['BNInception', 'InceptionV3'])), ToTorchFormatTensor( div=(args.arch not in ['BNInception', 'InceptionV3'])), normalize, ])), batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=pin_memory, drop_last=True) # in training we drop the last incomplete minibatch val_loader = torch.utils.data.DataLoader(SSNDataSet( "", val_prop_file, new_length=data_length, modality=args.modality, exclude_empty=True, **sampling_configs, aug_seg=args.num_aug_segments, body_seg=args.num_body_segments, image_tmpl="img_{:05d}.jpg" if args.modality in ["RGB", "RGBDiff"] else args.flow_prefix + "{}_{:05d}.jpg", random_shift=False, transform=torchvision.transforms.Compose([ GroupScale(int(scale_size)), GroupCenterCrop(crop_size), Stack(roll=(args.arch in ['BNInception', 'InceptionV3'])), ToTorchFormatTensor( div=(args.arch not in ['BNInception', 'InceptionV3'])), normalize, ]), reg_stats=train_loader.dataset.stats), batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=pin_memory) activity_criterion = torch.nn.CrossEntropyLoss().cuda() completeness_criterion = CompletenessLoss().cuda() regression_criterion = ClassWiseRegressionLoss().cuda() for group in policies: print(('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format( group['name'], len(group['params']), group['lr_mult'], group['decay_mult']))) optimizer = torch.optim.SGD(policies, args.lr, momentum=args.momentum, weight_decay=args.weight_decay) if args.evaluate: validate(val_loader, model, activity_criterion, completeness_criterion, regression_criterion, 0) return for epoch in range(args.start_epoch, args.epochs): adjust_learning_rate(optimizer, epoch, args.lr_steps) # train for one epoch train(train_loader, model, activity_criterion, completeness_criterion, regression_criterion, optimizer, epoch) # evaluate on validation set if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1: loss = validate(val_loader, model, activity_criterion, completeness_criterion, regression_criterion, (epoch + 1) * len(train_loader)) # remember best prec@1 and save checkpoint is_best = loss < best_loss best_loss = min(loss, best_loss) save_checkpoint( { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'best_loss': best_loss, 'reg_stats': torch.from_numpy(train_loader.dataset.stats) }, is_best, foldername=args.save_path, filename="checkpoint_{}.pth".format(epoch)) print('======================================================') print(epoch, is_best, loss, best_loss) print('======================================================')
reg_weights = weights rel_props = score_pickle_list[0][vid][0] return ( rel_props, merge_part(arrays, 1, act_weights), merge_part(arrays, 2, comp_weights), merge_part(arrays, 3, reg_weights), ) print(("Merge detection scores from {} sources...".format(len(score_pickle_list)))) detection_scores = {k: merge_scores(k) for k in score_pickle_list[0]} print("Done.") dataset = SSNDataSet("", test_prop_file, verbose=False) dataset_detections = [dict() for i in range(num_class)] if args.cls_scores: print(("Using classifier scores from {}".format(args.cls_scores))) cls_score_pc = pickle.load(open(args.cls_scores, "rb"), encoding="bytes") cls_score_dict = { os.path.splitext(os.path.basename(k.decode("utf-8")))[0]: v for k, v in list(cls_score_pc.items()) } else: cls_score_dict = None # generate detection results
".".join(k.split(".")[1:]): v for k, v in list(checkpoint["state_dict"].items()) } stats = checkpoint["reg_stats"].numpy() dataset = SSNDataSet( "", test_prop_file, new_length=data_length, modality=args.modality, aug_seg=2, body_seg=5, image_tmpl="img_{:05d}.jpg" if args.modality in ["RGB", "RGBDiff"] else args.flow_pref + "{}_{:05d}.jpg", test_mode=True, test_interval=args.frame_interval, transform=torchvision.transforms.Compose([ cropping, Stack(roll=(args.arch in ["BNInception", "InceptionV3"])), ToTorchFormatTensor( div=(args.arch not in ["BNInception", "InceptionV3"])), GroupNormalize(net.input_mean, net.input_std), ]), reg_stats=stats, verbose=False, ) index_queue = ctx.Queue() result_queue = ctx.Queue() workers = [ ctx.Process(
def main(): global args, best_loss args = parser.parse_args() dataset_configs = get_configs(args.dataset) num_class = dataset_configs['num_class'] stpp_configs = tuple(dataset_configs['stpp']) #TODO sampling_configs = dataset_configs['sampling'] base_model = 'p3d' model = SSN(num_class, args.num_aug_segments, args.num_body_segments, args.num_aug_segments, args.modality, base_model=base_model, dropout=args.dropout, stpp_cfg=stpp_configs, bn_mode=args.bn_mode) weights_file = 'ssn_activitynet1.2_BNInception_rgb_epoch-2_checkpoint.pth.tar' weights = torch.load(weights_file)['state_dict'] weights = {'.'.join(k.split('.')[1:]): v for k, v in list(weights.items())} model.load_state_dict(weights) crop_size = model.crop_size scale_size = model.scale_size input_mean = model.input_mean input_std = model.input_std policies = model.get_optim_policies() train_augmentation = model.get_augmentation() model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda() cudnn.benchmark = True pin_memory = (args.modality == 'RGB') # Data loading code if args.modality != 'RGBDiff': normalize = GroupNormalize(input_mean, input_std) else: normalize = IdentityTransform() if args.modality == 'RGB': data_length = 1 elif args.modality in ['Flow', 'RGBDiff']: data_length = 5 else: raise ValueError("unknown modality {}".format(args.modality)) train_prop_file = 'data/{}_proposal_list.txt'.format( dataset_configs['train_list']) val_prop_file = 'data/{}_proposal_list.txt'.format( dataset_configs['test_list']) train_loader = torch.utils.data.DataLoader( SSNDataSet( "", train_prop_file, epoch_multiplier=args.training_epoch_multiplier, new_length=data_length, modality=args.modality, exclude_empty=True, **sampling_configs, aug_seg=args.num_aug_segments, body_seg=args.num_body_segments, image_tmpl="{:05d}.jpg" if args.modality in ["RGB", "RGBDiff"] else args.flow_prefix + "{}_{:05d}.jpg", transform=torchvision.transforms.Compose([ train_augmentation, Stack(roll=(args.arch in ['BNInception', 'InceptionV3'])), ToTorchFormatTensor( div=(args.arch not in ['BNInception', 'InceptionV3'])), normalize, ])), batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=pin_memory, drop_last=True) # in training we drop the last incomplete minibatch val_loader = torch.utils.data.DataLoader(SSNDataSet( "", val_prop_file, new_length=data_length, modality=args.modality, exclude_empty=True, **sampling_configs, aug_seg=args.num_aug_segments, body_seg=args.num_body_segments, image_tmpl="{:05d}.jpg" if args.modality in ["RGB", "RGBDiff"] else args.flow_prefix + "{}_{:05d}.jpg", random_shift=False, transform=torchvision.transforms.Compose([ GroupScale(int(scale_size)), GroupCenterCrop(crop_size), Stack(roll=(args.arch in ['BNInception', 'InceptionV3'])), ToTorchFormatTensor( div=(args.arch not in ['BNInception', 'InceptionV3'])), normalize, ]), reg_stats=train_loader.dataset.stats), batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=pin_memory) activity_criterion = torch.nn.CrossEntropyLoss().cuda() completeness_criterion = CompletenessLoss().cuda() regression_criterion = ClassWiseRegressionLoss().cuda() for group in policies: print(('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format( group['name'], len(group['params']), group['lr_mult'], group['decay_mult']))) optimizer = torch.optim.SGD(policies, args.lr, momentum=args.momentum, weight_decay=args.weight_decay) if args.evaluate: validate(val_loader, model, activity_criterion, completeness_criterion, regression_criterion, 0) return # exit() for epoch in range(args.start_epoch, args.epochs): adjust_learning_rate(optimizer, epoch, args.lr_steps) # train for one epoch train(train_loader, model, activity_criterion, completeness_criterion, regression_criterion, optimizer, epoch) # evaluate on validation set if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1: loss = validate(val_loader, model, activity_criterion, completeness_criterion, regression_criterion, (epoch + 1) * len(train_loader)) # remember best prec@1 and save checkpoint is_best = loss < best_loss best_loss = min(loss, best_loss) save_checkpoint( { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'best_loss': best_loss, 'reg_stats': torch.from_numpy(train_loader.dataset.stats) }, is_best)
arrays = [pc[vid] for pc in score_pickle_list] act_weights = weights comp_weights = weights reg_weights = weights rel_props = score_pickle_list[0][vid][0] return rel_props, \ merge_part(arrays, 1, act_weights), \ merge_part(arrays, 2, comp_weights), \ merge_part(arrays, 3, reg_weights) print('Merge detection scores from {} sources...'.format(len(score_pickle_list))) detection_scores = {k: merge_scores(k) for k in score_pickle_list[0]} print('Done.') dataset = SSNDataSet("", test_prop_file, verbose=False) dataset_detections = [dict() for i in range(num_class)] if args.cls_scores: print('Using classifier scores from {}'.format(args.cls_scores)) cls_score_pc = pickle.load(open(args.cls_scores, 'rb'), encoding='bytes') cls_score_dict = {os.path.splitext(os.path.basename(k.decode('utf-8')))[0]:v for k, v in cls_score_pc.items()} else: cls_score_dict = None # generate detection results def gen_detection_results(video_id, score_tp): if len(score_tp[0].shape) == 3: rel_prop = np.squeeze(score_tp[0], 0)