def main(): args = parse_args('mfnet', val=True) output_dir = os.path.dirname(args.ckpt_path) log_file = make_log_file_name(output_dir, args) print_and_save(args, log_file) cudnn.benchmark = True mfnet_3d = MFNET_3D_MO num_classes = [args.action_classes, args.verb_classes, args.noun_classes] validate = validate_mfnet_mo_json kwargs = {} num_coords = 0 if args.use_hands: num_coords += 2 kwargs['num_coords'] = num_coords model_ft = mfnet_3d(num_classes, **kwargs) model_ft = torch.nn.DataParallel(model_ft).cuda() checkpoint = torch.load(args.ckpt_path, map_location={'cuda:1': 'cuda:0'}) if args.old_mfnet_eval: checkpoint['state_dict']['module.classifier_list.classifier_list.0.weight'] = checkpoint['state_dict']['module.classifier.weight'] checkpoint['state_dict']['module.classifier_list.classifier_list.0.bias'] = checkpoint['state_dict']['module.classifier.bias'] model_ft.load_state_dict(checkpoint['state_dict'], strict=False) print_and_save("Model loaded on gpu {} devices".format(args.gpus), log_file) num_valid_classes = len([cls for cls in num_classes if cls > 0]) valid_classes = [cls for cls in num_classes if cls > 0] crop_type = CenterCrop((224, 224)) val_sampler = MiddleSampling(num=args.clip_length, window=64) val_transforms = transforms.Compose([Resize((256, 256), False), crop_type, ToTensorVid(), Normalize(mean=mean_3d, std=std_3d)]) val_loader = VideoAndPointDatasetLoader(val_sampler, args.val_list, point_list_prefix=args.bpv_prefix, num_classes=num_classes, img_tmpl='frame_{:010d}.jpg', norm_val=[456., 256., 456., 256.], batch_transform=val_transforms, use_hands=False, validation=True) val_iter = torch.utils.data.DataLoader(val_loader, batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers, pin_memory=True) outputs = validate(model_ft, val_iter, num_valid_classes, args.val_list.split("\\")[-1], action_file=args.epic_actions_path) eval_mode = 'seen' if 's1' in args.val_list else 'unseen' if 's2' in args.val_list else 'unknown' json_file = "{}.json".format(os.path.join(output_dir, eval_mode)) with open(json_file, 'w') as jf: json.dump(outputs, jf)
def test_cnn(model, criterion, test_iterator, cur_epoch, dataset, log_file, gpus): losses, top1, top5 = AverageMeter(), AverageMeter(), AverageMeter() with torch.no_grad(): model.eval() print_and_save( 'Evaluating after epoch: {} on {} set'.format(cur_epoch, dataset), log_file) for batch_idx, (inputs, targets) in enumerate(test_iterator): inputs = inputs.cuda(gpus[0]) targets = targets.cuda(gpus[0]) output = model(inputs) loss = criterion(output, targets) t1, t5 = accuracy(output.detach(), targets.detach(), topk=(1, 5)) top1.update(t1.item(), output.size(0)) top5.update(t5.item(), output.size(0)) losses.update(loss.item(), output.size(0)) print_and_save( '[Epoch:{}, Batch {}/{}][Top1 {:.3f}[avg:{:.3f}], Top5 {:.3f}[avg:{:.3f}]]' .format(cur_epoch, batch_idx, len(test_iterator), top1.val, top1.avg, top5.val, top5.avg), log_file) print_and_save( '{} Results: Loss {:.3f}, Top1 {:.3f}, Top5 {:.3f}'.format( dataset, losses.avg, top1.avg, top5.avg), log_file) return top1.avg
def test_cnn_do(model, criterion, test_iterator, cur_epoch, dataset, log_file, gpus): losses, losses_a, losses_b, top1_a, top5_a, top1_b, top5_b = AverageMeter( ), AverageMeter(), AverageMeter(), AverageMeter(), AverageMeter( ), AverageMeter(), AverageMeter() with torch.no_grad(): model.eval() print_and_save( 'Evaluating after epoch: {} on {} set'.format(cur_epoch, dataset), log_file) for batch_idx, (inputs, targets) in enumerate(test_iterator): inputs = torch.tensor(inputs).cuda(gpus[0]) targets_a = torch.tensor(targets[0]).cuda(gpus[0]) targets_b = torch.tensor(targets[1]).cuda(gpus[0]) output_a, output_b = model(inputs) loss_a = criterion(output_a, targets_a) loss_b = criterion(output_b, targets_b) loss = 0.75 * loss_a + 0.25 * loss_b t1_a, t5_a = accuracy(output_a.detach().cpu(), targets_a.detach().cpu(), topk=(1, 5)) t1_b, t5_b = accuracy(output_b.detach().cpu(), targets_b.detach().cpu(), topk=(1, 5)) top1_a.update(t1_a.item(), output_a.size(0)) top5_a.update(t5_a.item(), output_a.size(0)) top1_b.update(t1_b.item(), output_b.size(0)) top5_b.update(t5_b.item(), output_b.size(0)) losses_a.update(loss_a.item(), output_a.size(0)) losses_b.update(loss_b.item(), output_b.size(0)) losses.update(loss.item(), output_a.size(0)) to_print = '[Epoch:{}, Batch {}/{}]' \ '[Top1_a {:.3f}[avg:{:.3f}], Top5_a {:.3f}[avg:{:.3f}],' \ 'Top1_b {:.3f}[avg:{:.3f}], Top5_b {:.3f}[avg:{:.3f}]]'.format( cur_epoch, batch_idx, len(test_iterator), top1_a.val, top1_a.avg, top5_a.val, top5_a.avg, top1_b.val, top1_b.avg, top5_b.val, top5_b.avg) print_and_save(to_print, log_file) print_and_save( '{} Results: Loss {:.3f}, Top1_a {:.3f}, Top5_a {:.3f}, Top1_b {:.3f}, Top5_b {:.3f}' .format(dataset, losses.avg, top1_a.avg, top5_a.avg, top1_b.avg, top5_b.avg), log_file) return top1_a.avg, top1_b.avg
def validate_lstm(model, criterion, test_iterator, cur_epoch, dataset, log_file, args): losses, top1, top5 = AverageMeter(), AverageMeter(), AverageMeter() outputs = [] print_and_save( 'Evaluating after epoch: {} on {} set'.format(cur_epoch, dataset), log_file) with torch.no_grad(): model.eval() for batch_idx, (inputs, seq_lengths, targets, video_names) in enumerate(test_iterator): inputs = torch.tensor(inputs).cuda() targets = torch.tensor(targets).cuda() inputs = inputs.transpose(1, 0) output = model(inputs, seq_lengths) loss = criterion(output, targets) batch_preds = [] for j in range(output.size(0)): res = np.argmax(output[j].detach().cpu().numpy()) label = targets[j].cpu().numpy() outputs.append([res, label]) batch_preds.append("{}, P-L:{}-{}".format( video_names[j], res, label)) t1, t5 = accuracy(output.detach().cpu(), targets.detach().cpu(), topk=(1, 5)) top1.update(t1.item(), output.size(0)) top5.update(t5.item(), output.size(0)) losses.update(loss.item(), output.size(0)) print_and_save( '[Batch {}/{}][Top1 {:.3f}[avg:{:.3f}], Top5 {:.3f}[avg:{:.3f}]]\n\t{}' .format(batch_idx, len(test_iterator), top1.val, top1.avg, top5.val, top5.avg, batch_preds), log_file) print_and_save( '{} Results: Loss {:.3f}, Top1 {:.3f}, Top5 {:.3f}'.format( dataset, losses.avg, top1.avg, top5.avg), log_file) return top1.avg, outputs
log_file = None no_norm_input = False ckpt_path = r"outputs\lstm_polar_128_0.0_1000_8_32_2_seq32_coords_polar_clr_tri_vsel125\lstm_polar_128_0.0_1000_8_32_2_seq32_coords_polar_clr_tri_vsel125_best.pth" val_list = r"splits\hand_tracks\hand_locs_val_1.txt" lstm_input, lstm_hidden, lstm_layers, verb_classes, lstm_seq_size = 8, 32, 2, 125, 32 lstm_model = LSTM_Hands_Polar kwargs = {'dropout': 0, 'bidir': True} model_ft = lstm_model(lstm_input, lstm_hidden, lstm_layers, verb_classes, **kwargs) model_ft = torch.nn.DataParallel(model_ft).cuda() checkpoint = torch.load(ckpt_path) model_ft.load_state_dict(checkpoint['state_dict']) model_ft.eval() print_and_save("Model loaded to gpu", log_file) criterion = torch.nn.CrossEntropyLoss().cuda() #%% norm_val = [1., 1., 1., 1.] if no_norm_input else [456., 256., 456., 256.] norm_val = np.array(norm_val) #dataset_loader = PointPolarDatasetLoader(val_list, max_seq_length=lstm_seq_size, # norm_val=norm_val, validation=True) #track_path = r"D:\Code\epic-kitchens-processing\output\yolo_allhands_tracked_videos\clean\P01\P01_04.pkl" #track_path = r"D:\Datasets\egocentric\EPIC_KITCHENS\clean_hand_detection_tracks\P30\P30_05\179200_5_140.pkl" track_path = r"D:\Datasets\egocentric\EPIC_KITCHENS\clean_hand_detection_tracks\P30\P30_05\81347_4_11.pkl" hand_tracks = load_pickle(track_path) left_track = np.array(hand_tracks['left'], dtype=np.float32) right_track = np.array(hand_tracks['right'], dtype=np.float32)
def main(): args, model_name = parse_args('lstm', val=False) output_dir, log_file = init_folders(args.base_output_dir, model_name, args.resume, args.logging) print_and_save(args, log_file) print_and_save("Model name: {}".format(model_name), log_file) cudnn.benchmark = True lstm_model = LSTM_per_hand if args.lstm_dual else LSTM_Hands_attn if args.lstm_attn else LSTM_Hands kwargs = { 'dropout': args.dropout, 'bidir': args.lstm_bidir, 'noun_classes': args.noun_classes, 'double_output': args.double_output } model_ft = lstm_model(args.lstm_input, args.lstm_hidden, args.lstm_layers, args.verb_classes, **kwargs) # model_ft = LSTM_Hands_encdec(456, 64, 32, args.lstm_layers, verb_classes, 0) model_ft = torch.nn.DataParallel(model_ft).cuda() print_and_save("Model loaded to gpu", log_file) if args.resume: model_ft, ckpt_path = resume_checkpoint(model_ft, output_dir, model_name, args.resume_from) print_and_save("Resuming training from: {}".format(ckpt_path), log_file) if args.only_left and args.only_right: sys.exit( "It must be at most one of *only_left* or *only_right* True at any time." ) norm_val = [1., 1., 1., 1. ] if args.no_norm_input else [456., 256., 456., 256.] if args.lstm_feature == "coords" or args.lstm_feature == "coords_dual": if args.lstm_clamped and (not args.lstm_dual or args.lstm_seq_size == 0): sys.exit( "Clamped tracks require dual lstms and a fixed lstm sequence size." ) train_loader = PointDatasetLoader(args.train_list, max_seq_length=args.lstm_seq_size, num_classes=args.verb_classes, norm_val=norm_val, dual=args.lstm_dual, clamp=args.lstm_clamped, only_left=args.only_left, only_right=args.only_right) test_loader = PointDatasetLoader(args.test_list, max_seq_length=args.lstm_seq_size, num_classes=args.verb_classes, norm_val=norm_val, dual=args.lstm_dual, clamp=args.lstm_clamped, only_left=args.only_left, only_right=args.only_right) elif args.lstm_feature == "vec_sum" or args.lstm_feature == "vec_sum_dual": train_loader = PointVectorSummedDatasetLoader( args.train_list, max_seq_length=args.lstm_seq_size, num_classes=args.verb_classes, dual=args.lstm_dual) test_loader = PointVectorSummedDatasetLoader( args.test_list, max_seq_length=args.lstm_seq_size, num_classes=args.verb_classes, dual=args.lstm_dual) elif args.lstm_feature == "coords_bpv": # if args.num_workers > 0: # from utils.dataset_loader import make_data_arr, parse_samples_list # data_arr_train = make_data_arr(parse_samples_list(args.train_list), args.bpv_prefix) # data_arr_test = make_data_arr(parse_samples_list(args.test_list), args.bpv_prefix) # import multiprocessing # manager_train = multiprocessing.Manager() # data_train = manager_train.list(data_arr_train) # manager_test = multiprocessing.Manager() # data_test = manager_test.list(data_arr_test) # # train_loader = PointBpvDatasetLoader(args.train_list, args.lstm_seq_size, # args.double_output, norm_val=norm_val, # bpv_prefix=args.bpv_prefix, data_arr=data_train) # test_loader = PointBpvDatasetLoader(args.test_list, args.lstm_seq_size, # args.double_output, norm_val=norm_val, # bpv_prefix=args.bpv_prefix, data_arr=data_test) # else: train_loader = PointBpvDatasetLoader(args.train_list, args.lstm_seq_size, args.double_output, norm_val=norm_val, bpv_prefix=args.bpv_prefix, num_workers=args.num_workers) test_loader = PointBpvDatasetLoader(args.test_list, args.lstm_seq_size, args.double_output, norm_val=norm_val, bpv_prefix=args.bpv_prefix, num_workers=args.num_workers) elif args.lstm_feature == "coords_objects": train_loader = PointObjDatasetLoader(args.train_list, args.lstm_seq_size, args.double_output, norm_val=norm_val, bpv_prefix=args.bpv_prefix) test_loader = PointObjDatasetLoader(args.test_list, args.lstm_seq_size, args.double_output, norm_val=norm_val, bpv_prefix=args.bpv_prefix) else: sys.exit("Unsupported lstm feature") # train_loader = PointImageDatasetLoader(train_list, norm_val=norm_val) # test_loader = PointImageDatasetLoader(test_list, norm_val=norm_val) train_iterator = torch.utils.data.DataLoader(train_loader, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers, pin_memory=True, collate_fn=lstm_collate) test_iterator = torch.utils.data.DataLoader(test_loader, batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers, pin_memory=True, collate_fn=lstm_collate) params_to_update = model_ft.parameters() print_and_save("Params to learn:", log_file) for name, param in model_ft.named_parameters(): if param.requires_grad == True: print_and_save("\t{}".format(name), log_file) optimizer = torch.optim.SGD(params_to_update, lr=args.lr, momentum=args.momentum, weight_decay=args.decay) ce_loss = torch.nn.CrossEntropyLoss().cuda() lr_scheduler = load_lr_scheduler(args.lr_type, args.lr_steps, optimizer, len(train_iterator)) train_fun, test_fun = ( train_attn_lstm, test_attn_lstm) if args.lstm_attn else ( train_lstm_do, test_lstm_do) if args.double_output else (train_lstm, test_lstm) if not args.double_output: new_top1, top1 = 0.0, 0.0 else: new_top1, top1 = (0.0, 0.0), (0.0, 0.0) for epoch in range(args.max_epochs): train_fun(model_ft, optimizer, ce_loss, train_iterator, epoch, log_file, lr_scheduler) if (epoch + 1) % args.eval_freq == 0: if args.eval_on_train: test_fun(model_ft, ce_loss, train_iterator, epoch, "Train", log_file) new_top1 = test_fun(model_ft, ce_loss, test_iterator, epoch, "Test", log_file) top1 = save_checkpoints(model_ft, optimizer, top1, new_top1, args.save_all_weights, output_dir, model_name, epoch, log_file)
def validate_lstm_do(model, criterion, test_iterator, cur_epoch, dataset, log_file, args): losses, losses_a, losses_b, top1_a, top5_a, top1_b, top5_b = AverageMeter( ), AverageMeter(), AverageMeter(), AverageMeter(), AverageMeter( ), AverageMeter(), AverageMeter() outputs_a, outputs_b = [], [] print_and_save( 'Evaluating after epoch: {} on {} set'.format(cur_epoch, dataset), log_file) with torch.no_grad(): model.eval() for batch_idx, (inputs, seq_lengths, targets, video_names) in enumerate(test_iterator): inputs = torch.tensor(inputs).cuda() inputs = inputs.transpose(1, 0) output_a, output_b = model(inputs, seq_lengths) targets_a = torch.tensor(targets[:, 0]).cuda() targets_b = torch.tensor(targets[:, 1]).cuda() loss_a = criterion(output_a, targets_a) loss_b = criterion(output_b, targets_b) loss = 0.75 * loss_a + 0.25 * loss_b batch_preds = [] for j in range(output_a.size(0)): res_a = np.argmax(output_a[j].detach().cpu().numpy()) res_b = np.argmax(output_b[j].detach().cpu().numpy()) label_a = targets_a[j].cpu().numpy() label_b = targets_b[j].cpu().numpy() outputs_a.append([res_a, label_a]) outputs_b.append([res_b, label_b]) batch_preds.append("{}, a P-L:{}-{}, b P-L:{}-{}".format( video_names[j], res_a, label_a, res_b, label_b)) t1_a, t5_a = accuracy(output_a.detach().cpu(), targets_a.detach().cpu(), topk=(1, 5)) t1_b, t5_b = accuracy(output_b.detach().cpu(), targets_b.detach().cpu(), topk=(1, 5)) top1_a.update(t1_a.item(), output_a.size(0)) top5_a.update(t5_a.item(), output_a.size(0)) top1_b.update(t1_b.item(), output_b.size(0)) top5_b.update(t5_b.item(), output_b.size(0)) losses_a.update(loss_a.item(), output_a.size(0)) losses_b.update(loss_b.item(), output_b.size(0)) losses.update(loss.item(), output_a.size(0)) to_print = '[Batch {}/{}]' \ '[Top1_a {:.3f}[avg:{:.3f}], Top5_a {:.3f}[avg:{:.3f}],' \ 'Top1_b {:.3f}[avg:{:.3f}], Top5_b {:.3f}[avg:{:.3f}]]\n\t{}'.format( batch_idx, len(test_iterator), top1_a.val, top1_a.avg, top5_a.val, top5_a.avg, top1_b.val, top1_b.avg, top5_b.val, top5_b.avg, batch_preds) print_and_save(to_print, log_file) print_and_save( '{} Results: Loss {:.3f}, Top1_a {:.3f}, Top5_a {:.3f}, Top1_b {:.3f}, Top5_b {:.3f}' .format(dataset, losses.avg, top1_a.avg, top5_a.avg, top1_b.avg, top5_b.avg), log_file) return (top1_a.avg, top1_b.avg), (outputs_a, outputs_b)
def main(): args = parse_args('lstm', val=True) output_dir = os.path.dirname(args.ckpt_path) log_file = os.path.join( output_dir, "results-accuracy-validation.txt") if args.logging else None if args.double_output and args.logging: if 'verb' in args.ckpt_path: log_file = os.path.join(output_dir, "results-accuracy-validation-verb.txt") if 'noun' in args.ckpt_path: log_file = os.path.join(output_dir, "results-accuracy-validation-noun.txt") print_and_save(args, log_file) cudnn.benchmark = True lstm_model = LSTM_per_hand if args.lstm_dual else LSTM_Hands_attn if args.lstm_attn else LSTM_Hands kwargs = { 'dropout': 0, 'bidir': args.lstm_bidir, 'noun_classes': args.noun_classes, 'double_output': args.double_output } model_ft = lstm_model(args.lstm_input, args.lstm_hidden, args.lstm_layers, args.verb_classes, **kwargs) model_ft = torch.nn.DataParallel(model_ft).cuda() checkpoint = torch.load(args.ckpt_path) model_ft.load_state_dict(checkpoint['state_dict']) print_and_save("Model loaded to gpu", log_file) if args.only_left and args.only_right: sys.exit( "It must be at most one of *only_left* or *only_right* True at any time." ) norm_val = [1., 1., 1., 1. ] if args.no_norm_input else [456., 256., 456., 256.] if args.lstm_feature == "coords" or args.lstm_feature == "coords_dual": if args.lstm_clamped and (not args.lstm_dual or args.lstm_seq_size == 0): sys.exit( "Clamped tracks require dual lstms and a fixed lstm sequence size." ) dataset_loader = PointDatasetLoader(args.val_list, max_seq_length=args.lstm_seq_size, num_classes=args.verb_classes, norm_val=norm_val, dual=args.lstm_dual, clamp=args.lstm_clamped, only_left=args.only_left, only_right=args.only_right, validation=True) elif args.lstm_feature == "vec_sum" or args.lstm_feature == "vec_sum_dual": dataset_loader = PointVectorSummedDatasetLoader( args.val_list, max_seq_length=args.lstm_seq_size, num_classes=args.verb_classes, dual=args.lstm_dual, validation=True) elif args.lstm_feature == "coords_bpv": dataset_loader = PointBpvDatasetLoader(args.val_list, args.lstm_seq_size, args.double_output, norm_val=norm_val, bpv_prefix=args.bpv_prefix, validation=True) elif args.lstm_feature == "coords_objects": dataset_loader = PointObjDatasetLoader(args.val_list, args.lstm_seq_size, args.double_output, norm_val=norm_val, bpv_prefix=args.bpv_prefix, validation=True) else: sys.exit("Unsupported lstm feature") collate_fn = lstm_collate # collate_fn = torch.utils.data.dataloader.default_collate dataset_iterator = torch.utils.data.DataLoader( dataset_loader, batch_size=args.batch_size, num_workers=args.num_workers, collate_fn=collate_fn, pin_memory=True) ce_loss = torch.nn.CrossEntropyLoss().cuda() validate = validate_lstm_attn if args.lstm_attn else validate_lstm_do if args.double_output else validate_lstm top1, outputs = validate(model_ft, ce_loss, dataset_iterator, checkpoint['epoch'], args.val_list.split("\\")[-1], log_file, args) if not isinstance(top1, tuple): video_preds = [x[0] for x in outputs] video_labels = [x[1] for x in outputs] mean_cls_acc, top1_acc = eval_final_print(video_preds, video_labels, "Verbs", args.annotations_path, args.val_list, args.verb_classes, log_file) else: video_preds_a, video_preds_b = [x[0] for x in outputs[0] ], [x[0] for x in outputs[1]] video_labels_a, video_labels_b = [x[1] for x in outputs[0] ], [x[1] for x in outputs[1]] mean_cls_acc_a, top1_acc_a = eval_final_print( video_preds_a, video_labels_a, "Verbs", args.annotations_path, args.val_list, args.verb_classes, log_file) mean_cls_acc_b, top1_acc_b = eval_final_print( video_preds_b, video_labels_b, "Nouns", args.annotations_path, args.val_list, args.verb_classes, log_file)
def train_cnn(model, optimizer, criterion, train_iterator, mixup_alpha, cur_epoch, log_file, gpus, lr_scheduler=None): batch_time, losses, top1, top5 = AverageMeter(), AverageMeter( ), AverageMeter(), AverageMeter() model.train() if not isinstance(lr_scheduler, CyclicLR): lr_scheduler.step() print_and_save('*********', log_file) print_and_save('Beginning of epoch: {}'.format(cur_epoch), log_file) t0 = time.time() for batch_idx, (inputs, targets) in enumerate(train_iterator): if isinstance(lr_scheduler, CyclicLR): lr_scheduler.step() inputs = inputs.cuda(gpus[0]) targets = targets.cuda(gpus[0]) # TODO: Fix mixup and cuda integration, especially for mfnet if mixup_alpha != 1: inputs, targets_a, targets_b, lam = mixup_data( inputs, targets, mixup_alpha) output = model(inputs) if mixup_alpha != 1: loss = mixup_criterion(criterion, output, targets_a, targets_b, lam) else: loss = criterion(output, targets) optimizer.zero_grad() loss.backward() # if clip_gradient is not None: # total_norm = torch.nn.clip_grad_norm_(model.parameters(), clip_gradient) # if total_norm > clip_gradient: # to_print = "clipping gradient: {} with coef {}".format(total_norm, clip_gradient / total_norm) # print_and_save(to_print, log_file) optimizer.step() t1, t5 = accuracy(output.detach(), targets.detach(), topk=(1, 5)) top1.update(t1.item(), output.size(0)) top5.update(t5.item(), output.size(0)) losses.update(loss.item(), output.size(0)) batch_time.update(time.time() - t0) t0 = time.time() print_and_save( '[Epoch:{}, Batch {}/{} in {:.3f} s][Loss {:.4f}[avg:{:.4f}], Top1 {:.3f}[avg:{:.3f}], Top5 {:.3f}[avg:{:.3f}]], LR {:.6f}' .format(cur_epoch, batch_idx, len(train_iterator), batch_time.val, losses.val, losses.avg, top1.val, top1.avg, top5.val, top5.avg, lr_scheduler.get_lr()[0]), log_file) print_and_save("Epoch train time: {}".format(batch_time.sum), log_file)
def eval_final_print(video_preds, video_labels, cls_type, annotations_path, val_list, max_classes, log_file): cf, recall, precision, cls_acc, mean_cls_acc, top1_acc = analyze_preds_labels( video_preds, video_labels, all_class_indices=list(range(int(max_classes)))) print_and_save(cls_type, log_file) print_and_save(cf, log_file) if annotations_path: brd_splits = '_brd' in val_list valid_verb_indices, verb_ids_sorted, valid_noun_indices, noun_ids_sorted = get_classes( annotations_path, val_list, brd_splits, 100) if cls_type == 'Verbs': valid_indices, ids_sorted = valid_verb_indices, verb_ids_sorted all_indices = list( range(int(125)) ) # manually set verb classes to avoid loading the verb names file that loads 125... else: valid_indices, ids_sorted = valid_noun_indices, noun_ids_sorted all_indices = list(range(int(352))) ave_pre, ave_rec, _ = avg_rec_prec_trimmed(video_preds, video_labels, valid_indices, all_indices) print_and_save("{} > 100 instances at training:".format(cls_type), log_file) print_and_save("Classes are {}".format(valid_indices), log_file) print_and_save( "average precision {0:02f}%, average recall {1:02f}%".format( ave_pre, ave_rec), log_file) print_and_save("Most common {} in training".format(cls_type), log_file) print_and_save( "15 {} rec {}".format(cls_type, recall[ids_sorted[:15]]), log_file) print_and_save( "15 {} pre {}".format(cls_type, precision[ids_sorted[:15]]), log_file) print_and_save("Cls Rec {}".format(recall), log_file) print_and_save("Cls Pre {}".format(precision), log_file) print_and_save("Cls Acc {}".format(cls_acc), log_file) print_and_save("Mean Cls Acc {:.02f}%".format(mean_cls_acc), log_file) print_and_save("Dataset Acc {}".format(top1_acc), log_file) return mean_cls_acc, top1_acc
def main(): args = parse_args('mfnet', val=True) output_dir = os.path.dirname(args.ckpt_path) log_file = make_log_file_name(output_dir, args) print_and_save(args, log_file) cudnn.benchmark = True if not args.double_output: mfnet_3d = MFNET_3D num_classes = args.verb_classes validate = validate_resnet overall_top1, overall_mean_cls_acc = 0.0, 0.0 else: mfnet_3d = MFNET_3D_DO num_classes = (args.verb_classes, args.noun_classes) validate = validate_resnet_do overall_top1, overall_mean_cls_acc = (0.0, 0.0), (0.0, 0.0) model_ft = mfnet_3d(num_classes) model_ft = torch.nn.DataParallel(model_ft).cuda() checkpoint = torch.load(args.ckpt_path, map_location={'cuda:1': 'cuda:0'}) model_ft.load_state_dict(checkpoint['state_dict']) print_and_save("Model loaded on gpu {} devices".format(args.gpus), log_file) ce_loss = torch.nn.CrossEntropyLoss().cuda() for i in range(args.mfnet_eval): crop_type = CenterCrop( (224, 224)) if args.eval_crop == 'center' else RandomCrop( (224, 224)) if args.eval_sampler == 'middle': val_sampler = MiddleSampling(num=args.clip_length) else: val_sampler = RandomSampling(num=args.clip_length, interval=args.frame_interval, speed=[1.0, 1.0], seed=i) val_transforms = transforms.Compose([ Resize((256, 256), False), crop_type, ToTensorVid(), Normalize(mean=mean_3d, std=std_3d) ]) val_loader = VideoDatasetLoader(val_sampler, args.val_list, num_classes=num_classes, batch_transform=val_transforms, img_tmpl='frame_{:010d}.jpg', validation=True) val_iter = torch.utils.data.DataLoader(val_loader, batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers, pin_memory=True) top1, outputs = validate(model_ft, ce_loss, val_iter, checkpoint['epoch'], args.val_list.split("\\")[-1], log_file) if not isinstance(top1, tuple): video_preds = [x[0] for x in outputs] video_labels = [x[1] for x in outputs] mean_cls_acc, top1_acc = eval_final_print(video_preds, video_labels, "Verbs", args.annotations_path, args.val_list, num_classes, log_file) overall_mean_cls_acc += mean_cls_acc overall_top1 += top1_acc else: video_preds_a, video_preds_b = [x[0] for x in outputs[0] ], [x[0] for x in outputs[1]] video_labels_a, video_labels_b = [x[1] for x in outputs[0] ], [x[1] for x in outputs[1]] mean_cls_acc_a, top1_acc_a = eval_final_print( video_preds_a, video_labels_a, "Verbs", args.annotations_path, args.val_list, num_classes, log_file) mean_cls_acc_b, top1_acc_b = eval_final_print( video_preds_b, video_labels_b, "Nouns", args.annotations_path, args.val_list, num_classes, log_file) overall_mean_cls_acc = (overall_mean_cls_acc[0] + mean_cls_acc_a, overall_mean_cls_acc[1] + mean_cls_acc_b) overall_top1 = (overall_top1[0] + top1_acc_a, overall_top1[1] + top1_acc_b) print_and_save("", log_file) if not isinstance(top1, tuple): print_and_save( "Mean Cls Acc {}".format(overall_mean_cls_acc / args.mfnet_eval), log_file) print_and_save( "Dataset Acc ({} times) {}".format(args.mfnet_eval, overall_top1 / args.mfnet_eval), log_file) else: print_and_save( "Mean Cls Acc a {}, b {}".format( overall_mean_cls_acc[0] / args.mfnet_eval, overall_mean_cls_acc[1] / args.mfnet_eval), log_file) print_and_save( "Dataset Acc ({} times) a {}, b {}".format( args.mfnet_eval, overall_top1[0] / args.mfnet_eval, overall_top1[1] / args.mfnet_eval), log_file)
start_decay = 0.001 batch_size = 128 max_epochs = 100 lr_range = [ 0.001, 0.002, 0.005, 0.008, 0.01, 0.02, 0.04, 0.07, 0.1, 0.5, 1., 2., 3. ] log_file = os.path.join(base_output_dir, model_name, model_name + "_{}.txt".format("range")) model_ft = LSTM_Hands(lstm_input, lstm_hidden, lstm_layers, verb_classes, dropout) model_ft = torch.nn.DataParallel(model_ft).cuda() cudnn.benchmark = True params_to_update = model_ft.parameters() print_and_save("Params to learn:", log_file) for name, param in model_ft.named_parameters(): if param.requires_grad == True: print_and_save("\t{}".format(name), log_file) optimizer = torch.optim.SGD(params_to_update, lr=start_lr, momentum=start_mom, weight_decay=start_decay) lr_scheduler = LRRangeTest(optimizer, lr_range, max_epochs) ce_loss = torch.nn.CrossEntropyLoss().cuda() train_loader = PointDatasetLoader(train_list, max_seq_length=16, norm_val=norm_val,
def train_cnn_do(model, optimizer, criterion, train_iterator, mixup_alpha, cur_epoch, log_file, gpus, lr_scheduler=None): batch_time, losses_a, losses_b, losses, top1_a, top5_a, top1_b, top5_b = AverageMeter( ), AverageMeter(), AverageMeter(), AverageMeter(), AverageMeter( ), AverageMeter(), AverageMeter(), AverageMeter() model.train() if not isinstance(lr_scheduler, CyclicLR): lr_scheduler.step() print_and_save('*********', log_file) print_and_save('Beginning of epoch: {}'.format(cur_epoch), log_file) t0 = time.time() for batch_idx, (inputs, targets) in enumerate(train_iterator): if isinstance(lr_scheduler, CyclicLR): lr_scheduler.step() inputs = torch.tensor(inputs, requires_grad=True).cuda(gpus[0]) targets_a = torch.tensor(targets[0]).cuda(gpus[0]) targets_b = torch.tensor(targets[1]).cuda(gpus[0]) output_a, output_b = model(inputs) loss_a = criterion(output_a, targets_a) loss_b = criterion(output_b, targets_b) loss = 0.75 * loss_a + 0.25 * loss_b optimizer.zero_grad() loss.backward() optimizer.step() t1_a, t5_a = accuracy(output_a.detach().cpu(), targets_a.detach().cpu(), topk=(1, 5)) t1_b, t5_b = accuracy(output_b.detach().cpu(), targets_b.detach().cpu(), topk=(1, 5)) top1_a.update(t1_a.item(), output_a.size(0)) top5_a.update(t5_a.item(), output_a.size(0)) top1_b.update(t1_b.item(), output_b.size(0)) top5_b.update(t5_b.item(), output_b.size(0)) losses_a.update(loss_a.item(), output_a.size(0)) losses_b.update(loss_b.item(), output_b.size(0)) losses.update(loss.item(), output_a.size(0)) batch_time.update(time.time() - t0) t0 = time.time() to_print = '[Epoch:{}, Batch {}/{} in {:.3f} s]'\ '[Losses {:.4f}[avg:{:.4f}], loss_a {:.4f}[avg:{:.4f}], loss_b {:.4f}[avg:{:.4f}],' \ 'Top1_a {:.3f}[avg:{:.3f}], Top5_a {:.3f}[avg:{:.3f}],' \ 'Top1_b {:.3f}[avg:{:.3f}], Top5_b {:.3f}[avg:{:.3f}]],' \ 'LR {:.6f}'.format( cur_epoch, batch_idx, len(train_iterator), batch_time.val, losses_a.val, losses_a.avg, losses_b.val, losses_b.avg, losses.val, losses.avg, top1_a.val, top1_a.avg, top5_a.val, top5_a.avg, top1_b.val, top1_b.avg, top5_b.val, top5_b.avg, lr_scheduler.get_lr()[0]) print_and_save(to_print, log_file)
def main(): args, model_name = parse_args('mfnet', val=False) output_dir, log_file = init_folders(args.base_output_dir, model_name, args.resume, args.logging) print_and_save(args, log_file) print_and_save("Model name: {}".format(model_name), log_file) cudnn.benchmark = True mfnet_3d = MFNET_3D if not args.double_output else MFNET_3D_DO num_classes = args.verb_classes if not args.double_output else ( args.verb_classes, args.noun_classes) model_ft = mfnet_3d(num_classes, dropout=args.dropout) if args.pretrained: checkpoint = torch.load(args.pretrained_model_path) # below line is needed if network is trained with DataParallel base_dict = { '.'.join(k.split('.')[1:]): v for k, v in list(checkpoint['state_dict'].items()) } base_dict = { k: v for k, v in list(base_dict.items()) if 'classifier' not in k } model_ft.load_state_dict( base_dict, strict=False) #model.load_state_dict(checkpoint['state_dict']) model_ft.cuda(device=args.gpus[0]) model_ft = torch.nn.DataParallel(model_ft, device_ids=args.gpus, output_device=args.gpus[0]) print_and_save("Model loaded on gpu {} devices".format(args.gpus), log_file) # load dataset and train and validation iterators train_sampler = prepare_sampler("train", args.clip_length, args.frame_interval) train_transforms = transforms.Compose([ RandomScale(make_square=True, aspect_ratio=[0.8, 1. / 0.8], slen=[224, 288]), RandomCrop((224, 224)), RandomHorizontalFlip(), RandomHLS(vars=[15, 35, 25]), ToTensorVid(), Normalize(mean=mean_3d, std=std_3d) ]) train_loader = VideoDatasetLoader(train_sampler, args.train_list, num_classes=num_classes, batch_transform=train_transforms, img_tmpl='frame_{:010d}.jpg') train_iterator = torch.utils.data.DataLoader(train_loader, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers, pin_memory=True) test_sampler = prepare_sampler("val", args.clip_length, args.frame_interval) test_transforms = transforms.Compose([ Resize((256, 256), False), CenterCrop((224, 224)), ToTensorVid(), Normalize(mean=mean_3d, std=std_3d) ]) test_loader = VideoDatasetLoader(test_sampler, args.test_list, num_classes=num_classes, batch_transform=test_transforms, img_tmpl='frame_{:010d}.jpg') test_iterator = torch.utils.data.DataLoader(test_loader, batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers, pin_memory=True) # config optimizer param_base_layers = [] param_new_layers = [] name_base_layers = [] for name, param in model_ft.named_parameters(): if args.pretrained: if 'classifier' in name: param_new_layers.append(param) else: param_base_layers.append(param) name_base_layers.append(name) else: param_new_layers.append(param) optimizer = torch.optim.SGD([{ 'params': param_base_layers, 'lr_mult': args.lr_mult_base }, { 'params': param_new_layers, 'lr_mult': args.lr_mult_new }], lr=args.lr, momentum=args.momentum, weight_decay=args.decay, nesterov=True) if args.resume and 'optimizer' in checkpoint: optimizer.load_state_dict(checkpoint['optimizer']) ce_loss = torch.nn.CrossEntropyLoss().cuda(device=args.gpus[0]) lr_scheduler = load_lr_scheduler(args.lr_type, args.lr_steps, optimizer, len(train_iterator)) if not args.double_output: new_top1, top1 = 0.0, 0.0 else: new_top1, top1 = (0.0, 0.0), (0.0, 0.0) train = train_cnn if not args.double_output else train_cnn_do test = test_cnn if not args.double_output else test_cnn_do for epoch in range(args.max_epochs): train(model_ft, optimizer, ce_loss, train_iterator, args.mixup_a, epoch, log_file, args.gpus, lr_scheduler) if (epoch + 1) % args.eval_freq == 0: if args.eval_on_train: test(model_ft, ce_loss, train_iterator, epoch, "Train", log_file, args.gpus) new_top1 = test(model_ft, ce_loss, test_iterator, epoch, "Test", log_file, args.gpus) top1 = save_checkpoints(model_ft, optimizer, top1, new_top1, args.save_all_weights, output_dir, model_name, epoch, log_file)
def main(): args, model_name = parse_args('mfnet', val=False) output_dir, log_file = init_folders(args.base_output_dir, model_name, args.resume, args.logging) print_and_save(args, log_file) print_and_save("Model name: {}".format(model_name), log_file) cudnn.benchmark = True mfnet_3d = MFNET_3D_MO # mfnet 3d multi output kwargs = {} num_coords = 0 objectives_text = "Objectives: " num_classes = [args.action_classes, args.verb_classes, args.noun_classes] num_objectives = 0 if args.action_classes > 0: # plan to use in EPIC objectives_text += " actions {}, ".format(args.action_classes) num_objectives += 1 if args.verb_classes > 0: objectives_text += " verbs {}, ".format(args.verb_classes) num_objectives += 1 if args.noun_classes > 0: objectives_text += " nouns {}, ".format(args.noun_classes) num_objectives += 1 # if args.use_gaze: # unused in EPIC # objectives_text += " gaze, " # num_coords += 1 # num_objectives += 1 if args.use_hands: objectives_text += " hands, " num_coords += 2 num_objectives += 1 kwargs["num_coords"] = num_coords print_and_save("Training for {} objective(s)".format(num_objectives), log_file) print_and_save(objectives_text, log_file) # for now just limit the tasks to max 3 and dont take extra nouns into account model_ft = mfnet_3d(num_classes, dropout=args.dropout, **kwargs) if args.pretrained: checkpoint = torch.load(args.pretrained_model_path) # below line is needed if network is trained with DataParallel base_dict = {'.'.join(k.split('.')[1:]): v for k, v in list(checkpoint['state_dict'].items())} base_dict = {k: v for k, v in list(base_dict.items()) if 'classifier' not in k} model_ft.load_state_dict(base_dict, strict=False) # model.load_state_dict(checkpoint['state_dict']) model_ft.cuda(device=args.gpus[0]) model_ft = torch.nn.DataParallel(model_ft, device_ids=args.gpus, output_device=args.gpus[0]) print_and_save("Model loaded on gpu {} devices".format(args.gpus), log_file) if args.resume: #Note: When resuming the 1task+hand models from before 18-June-19 I should be using MFNET_3D from mfnet_3d_hands.py model_ft, ckpt_path = resume_checkpoint(model_ft, output_dir, model_name, args.resume_from) print_and_save("Resuming training from: {}".format(ckpt_path), log_file) # load train-val sampler train_sampler = prepare_sampler("train", args.clip_length, args.frame_interval) train_transforms = transforms.Compose([ RandomScale(make_square=True, aspect_ratio=[0.8, 1./0.8], slen=[224, 288]), RandomCrop((224, 224)), RandomHorizontalFlip(), RandomHLS(vars=[15, 35, 25]), ToTensorVid(), Normalize(mean=mean_3d, std=std_3d)]) train_loader = VideoAndPointDatasetLoader(train_sampler, args.train_list, point_list_prefix=args.bpv_prefix, num_classes=num_classes, img_tmpl='frame_{:010d}.jpg', norm_val=[456., 256., 456., 256.], batch_transform=train_transforms, use_hands=args.use_hands) train_iterator = torch.utils.data.DataLoader(train_loader, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers, pin_memory=True) test_sampler = prepare_sampler("val", args.clip_length, args.frame_interval) test_transforms = transforms.Compose([Resize((256, 256), False), CenterCrop((224, 224)), ToTensorVid(), Normalize(mean=mean_3d, std=std_3d)]) # make train-val dataset loaders test_loader = VideoAndPointDatasetLoader(test_sampler, args.test_list, point_list_prefix=args.bpv_prefix, num_classes=num_classes, img_tmpl='frame_{:010d}.jpg', norm_val=[456., 256., 456., 256.], batch_transform=test_transforms, use_hands=args.use_hands) test_iterator = torch.utils.data.DataLoader(test_loader, batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers, pin_memory=True) # config optimizer param_base_layers = [] param_new_layers = [] name_base_layers = [] for name, param in model_ft.named_parameters(): if args.pretrained: if 'classifier' in name: param_new_layers.append(param) else: param_base_layers.append(param) name_base_layers.append(name) else: param_new_layers.append(param) optimizer = torch.optim.SGD([{'params': param_base_layers, 'lr_mult': args.lr_mult_base}, {'params': param_new_layers, 'lr_mult': args.lr_mult_new}], lr=args.lr, momentum=args.momentum, weight_decay=args.decay, nesterov=True) # if args.resume and 'optimizer' in checkpoint: # optimizer.load_state_dict(checkpoint['optimizer']) ce_loss = torch.nn.CrossEntropyLoss().cuda(device=args.gpus[0]) # mse_loss = torch.nn.MSELoss().cuda(device=args.gpus[0]) lr_scheduler = load_lr_scheduler(args.lr_type, args.lr_steps, optimizer, len(train_iterator)) train = train_mfnet_mo test = test_mfnet_mo num_valid_classes = len([cls for cls in num_classes if cls > 0]) new_top1, top1 = [0.0] * num_valid_classes, [0.0] * num_valid_classes for epoch in range(args.max_epochs): train(model_ft, optimizer, ce_loss, train_iterator, num_valid_classes, False, args.use_hands, epoch, log_file, args.gpus, lr_scheduler) if (epoch + 1) % args.eval_freq == 0: if args.eval_on_train: test(model_ft, ce_loss, train_iterator, num_valid_classes, False, args.use_hands, epoch, "Train", log_file, args.gpus) new_top1 = test(model_ft, ce_loss, test_iterator, num_valid_classes, False, args.use_hands, epoch, "Test", log_file, args.gpus) top1 = save_mt_checkpoints(model_ft, optimizer, top1, new_top1, args.save_all_weights, output_dir, model_name, epoch, log_file)
def main(): args, model_name = parse_args('lstm_diffs', val=False) # init dirs, names output_dir, log_file = init_folders(args.base_output_dir, model_name, args.resume, args.logging) print_and_save(args, log_file) print_and_save("Model name: {}".format(model_name), log_file) cudnn.benchmark = True # init model lstm_model = LSTM_Hands kwargs = {'dropout': args.dropout, 'bidir': args.lstm_bidir} model_ft = lstm_model(args.lstm_input, args.lstm_hidden, args.lstm_layers, args.verb_classes, **kwargs) model_ft = torch.nn.DataParallel(model_ft).cuda() print_and_save("Model loaded to gpu", log_file) if args.resume: model_ft, ckpt_path = resume_checkpoint(model_ft, output_dir, model_name, args.resume_from) print_and_save("Resuming training from: {}".format(ckpt_path), log_file) norm_val = [1., 1., 1., 1. ] if args.no_norm_input else [456., 256., 456., 256.] # train_loader = PointPolarDatasetLoader(args.train_list, max_seq_length=args.lstm_seq_size, # norm_val=norm_val) # test_loader = PointPolarDatasetLoader(args.test_list, max_seq_length=args.lstm_seq_size, # norm_val=norm_val) # train_loader = AnglesDatasetLoader(args.train_list, max_seq_length=args.lstm_seq_size) # test_loader = AnglesDatasetLoader(args.test_list, max_seq_length=args.lstm_seq_size) train_loader = PointDiffDatasetLoader(args.train_list, max_seq_length=args.lstm_seq_size, norm_val=norm_val) test_loader = PointDiffDatasetLoader(args.test_list, max_seq_length=args.lstm_seq_size, norm_val=norm_val) train_iterator = torch.utils.data.DataLoader(train_loader, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers, collate_fn=lstm_collate, pin_memory=True) test_iterator = torch.utils.data.DataLoader(test_loader, batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers, collate_fn=lstm_collate, pin_memory=True) params_to_update = model_ft.parameters() print_and_save("Params to learn:", log_file) for name, param in model_ft.named_parameters(): if param.requires_grad == True: print_and_save("\t{}".format(name), log_file) optimizer = torch.optim.SGD(params_to_update, lr=args.lr, momentum=args.momentum, weight_decay=args.decay) ce_loss = torch.nn.CrossEntropyLoss().cuda() lr_scheduler = load_lr_scheduler(args.lr_type, args.lr_steps, optimizer, len(train_iterator)) train_fun, test_fun = (train_lstm, test_lstm) new_top1, top1 = 0.0, 0.0 for epoch in range(args.max_epochs): train_fun(model_ft, optimizer, ce_loss, train_iterator, epoch, log_file, lr_scheduler) if (epoch + 1) % args.eval_freq == 0: if args.eval_on_train: test_fun(model_ft, ce_loss, train_iterator, epoch, "Train", log_file) new_top1 = test_fun(model_ft, ce_loss, test_iterator, epoch, "Test", log_file) top1 = save_checkpoints(model_ft, optimizer, top1, new_top1, args.save_all_weights, output_dir, model_name, epoch, log_file)
def eval_final_print_mt(video_preds, video_labels, task_id, current_classes, log_file, annotations_path=None, val_list=None, task_type='None', action_file=None): cf, recall, precision, cls_acc, mean_cls_acc, top1_acc = analyze_preds_labels( video_preds, video_labels, all_class_indices=list(range(int(current_classes)))) print_and_save("Task {}".format(task_id), log_file) print_and_save(cf, log_file) if annotations_path: brd_splits = '_brd' in val_list valid_verb_indices, verb_ids_sorted, valid_noun_indices, noun_ids_sorted = get_classes( annotations_path, val_list, brd_splits, 100) if task_type == 'EpicActions': valid_indices = get_action_classes(annotations_path, val_list, brd_splits, 100, action_file) all_indices = list(range(2521)) if task_type == 'EpicVerbs': # 'Verbs': error prone if I ever train nouns on their own valid_indices, ids_sorted = valid_verb_indices, verb_ids_sorted all_indices = list( range(int(125)) ) # manually set verb classes to avoid loading the verb names file that loads 125... elif task_type == 'EpicNouns': valid_indices, ids_sorted = valid_noun_indices, noun_ids_sorted all_indices = list(range(int(352))) ave_pre, ave_rec, _ = avg_rec_prec_trimmed(video_preds, video_labels, valid_indices, all_indices) print_and_save("{} > 100 instances at training:".format(task_type), log_file) print_and_save("Classes are {}".format(valid_indices), log_file) print_and_save( "average precision {0:02f}%, average recall {1:02f}%".format( ave_pre, ave_rec), log_file) if task_type != 'EpicActions': print_and_save("Most common {} in training".format(task_type), log_file) print_and_save( "15 {} rec {}".format(task_type, recall[ids_sorted[:15]]), log_file) print_and_save( "15 {} pre {}".format(task_type, precision[ids_sorted[:15]]), log_file) print_and_save("Cls Rec {}".format(recall), log_file) print_and_save("Cls Pre {}".format(precision), log_file) print_and_save("Cls Acc {}".format(cls_acc), log_file) print_and_save("Mean Cls Acc {:.02f}%".format(mean_cls_acc), log_file) print_and_save("Dataset Acc {}".format(top1_acc), log_file) return mean_cls_acc, top1_acc
def validate_lstm_attn(model, criterion, test_iterator, cur_epoch, dataset, log_file, args): losses, top1, top5 = AverageMeter(), AverageMeter(), AverageMeter() predictions = [] # for attention all_predictions = torch.zeros((0, args.lstm_seq_size, 1)) all_attentions = torch.zeros((0, args.lstm_seq_size, args.lstm_seq_size)) all_targets = torch.zeros((0, 1)) all_video_names = [] num_changing_in_seq = 0 for_the_better, for_the_worse = [], [] print_and_save( 'Evaluating after epoch: {} on {} set'.format(cur_epoch, dataset), log_file) with torch.no_grad(): model.eval() for batch_idx, (inputs, seq_lengths, targets, video_names) in enumerate(test_iterator): inputs = torch.tensor(inputs).cuda() targets = torch.tensor(targets).cuda() inputs = inputs.transpose(1, 0) outputs, attn_weights = model(inputs, seq_lengths) loss = 0 for output in outputs: loss += criterion(output, targets) loss /= len(outputs) outputs = torch.stack(outputs) outputs = torch.argmax(outputs, dim=2).detach().cpu( ) # edw exw thn provlepsh gia kathe step tou sequence gia olo to batch all_predictions = torch.cat( (all_predictions, torch.transpose(outputs, 0, 1).float()), dim=0) outputs = outputs.numpy() maj_vote = [ np.bincount(outputs[:, kk]).argmax() for kk in range(len(outputs[0])) ] # to argmax kanei majority voting for i in range(len(maj_vote)): # iteration in the batch size if outputs[:, i].any() != outputs[-1, i]: num_changing_in_seq += 1 if maj_vote[i] != outputs[ -1, i]: # compare the majority vote to the prediction of the last step in the sequence tar = targets[i].cpu().numpy() if maj_vote[i] == tar: for_the_better.append(video_names[i]) elif outputs[-1, i] == tar: for_the_worse.append(video_names[i]) outputs = maj_vote attn_weights = torch.transpose(torch.stack(attn_weights), 0, 1).detach().cpu() all_attentions = torch.cat((all_attentions, attn_weights), dim=0) all_targets = torch.cat( (all_targets, targets.detach().cpu().float()), dim=0) all_video_names = all_video_names + video_names batch_preds = [] for j in range(len(outputs)): res = outputs[j] label = targets[j].cpu().numpy() predictions.append([res, label]) batch_preds.append("{}, P-L:{}-{}".format( video_names[j], res, label)) t1, t5 = accuracy( output.detach().cpu( ), # use output (instead of outputs) for accuracy; outputs is used for the confusion matrix targets.detach().cpu(), topk=(1, 5)) top1.update(t1.item(), output.size(0)) top5.update(t5.item(), output.size(0)) losses.update(loss.item(), output.size(0)) print_and_save( '[Batch {}/{}][Top1 {:.3f}[avg:{:.3f}], Top5 {:.3f}[avg:{:.3f}]]\n\t{}' .format(batch_idx, len(test_iterator), top1.val, top1.avg, top5.val, top5.avg, batch_preds), log_file) print_and_save( "Num samples with differences {}".format(num_changing_in_seq), log_file) print_and_save( "{} changed for the better\n{}".format(len(for_the_better), for_the_better), log_file) print_and_save( "{} changed for the worse\n{}".format(len(for_the_worse), for_the_worse), log_file) print_and_save( '{} Results: Loss {:.3f}, Top1 {:.3f}, Top5 {:.3f}'.format( dataset, losses.avg, top1.avg, top5.avg), log_file) if args.save_attentions: all_predictions = all_predictions.numpy().astype(np.int) all_targets = all_targets.numpy().astype(np.int) output_dir = os.path.join(os.path.dirname(log_file), "figures") os.makedirs(output_dir, exist_ok=True) for i in range(len(all_targets)): name_parts = all_video_names[i].split("\\")[-2:] output_file = os.path.join( output_dir, "{}_{}.png".format(name_parts[0], name_parts[1].split('.')[0])) showAttention(args.lstm_seq_size, all_predictions[i], all_targets[i], all_attentions[i], output_file) return top1.avg, predictions
def main(): args = parse_args('mfnet', val=True) output_dir = os.path.dirname(args.ckpt_path) log_file = make_log_file_name(output_dir, args) print_and_save(args, log_file) cudnn.benchmark = True mfnet_3d = MFNET_3D_MO num_classes = [args.action_classes, args.verb_classes, args.noun_classes] validate = validate_mfnet_mo kwargs = {} num_coords = 0 if args.use_gaze: num_coords += 1 if args.use_hands: num_coords += 2 kwargs['num_coords'] = num_coords model_ft = mfnet_3d(num_classes, **kwargs) model_ft = torch.nn.DataParallel(model_ft).cuda() checkpoint = torch.load(args.ckpt_path, map_location={'cuda:1': 'cuda:0'}) model_ft.load_state_dict(checkpoint['state_dict']) print_and_save("Model loaded on gpu {} devices".format(args.gpus), log_file) ce_loss = torch.nn.CrossEntropyLoss().cuda() num_valid_classes = len([cls for cls in num_classes if cls > 0]) valid_classes = [cls for cls in num_classes if cls > 0] overall_top1 = [0]*num_valid_classes overall_mean_cls_acc = [0]*num_valid_classes for i in range(args.mfnet_eval): crop_type = CenterCrop((224, 224)) if args.eval_crop == 'center' else RandomCrop((224, 224)) if args.eval_sampler == 'middle': val_sampler = MiddleSampling(num=args.clip_length) else: val_sampler = RandomSampling(num=args.clip_length, interval=args.frame_interval, speed=[1.0, 1.0], seed=i) val_transforms = transforms.Compose([Resize((256, 256), False), crop_type, ToTensorVid(), Normalize(mean=mean_3d, std=std_3d)]) # val_loader = FromVideoDatasetLoaderGulp(val_sampler, args.val_list, 'GTEA', num_classes, GTEA_CLASSES, # use_gaze=args.use_gaze, gaze_list_prefix=args.gaze_list_prefix, # use_hands=args.use_hands, hand_list_prefix=args.hand_list_prefix, # batch_transform=val_transforms, extra_nouns=False, validation=True) val_loader = VideoFromImagesDatasetLoader(val_sampler, args.val_list, 'GTEA', num_classes, GTEA_CLASSES, use_gaze=args.use_gaze, gaze_list_prefix=args.gaze_list_prefix, use_hands=args.use_hands, hand_list_prefix=args.hand_list_prefix, batch_transform=val_transforms, extra_nouns=False, validation=True) val_iter = torch.utils.data.DataLoader(val_loader, batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers, pin_memory=True) # evaluate dataset top1, outputs = validate(model_ft, ce_loss, val_iter, num_valid_classes, args.use_gaze, args.use_hands, checkpoint['epoch'], args.val_list.split("\\")[-1], log_file) # calculate statistics for ind in range(num_valid_classes): video_preds = [x[0] for x in outputs[ind]] video_labels = [x[1] for x in outputs[ind]] mean_cls_acc, top1_acc = eval_final_print_mt(video_preds, video_labels, ind, valid_classes[ind], log_file) overall_mean_cls_acc[ind] += mean_cls_acc overall_top1[ind] += top1_acc print_and_save("", log_file) text_mean_cls_acc = "Mean Cls Acc ({} times)".format(args.mfnet_eval) text_dataset_acc = "Dataset Acc ({} times)".format(args.mfnet_eval) for ind in range(num_valid_classes): text_mean_cls_acc += ", T{}::{} ".format(ind, (overall_mean_cls_acc[ind] / args.mfnet_eval)) text_dataset_acc += ", T{}::{} ".format(ind, (overall_top1[ind] / args.mfnet_eval)) print_and_save(text_mean_cls_acc, log_file) print_and_save(text_dataset_acc, log_file)
def main(): args = parse_args('resnet', val=True) output_dir = os.path.dirname(args.ckpt_path) log_file = os.path.join( output_dir, "results-accuracy-validation.txt") if args.logging else None print_and_save(args, log_file) cudnn.benchmark = True model_ft = resnet_loader(args.verb_classes, 0, False, False, args.resnet_version, 1 if args.channels == 'G' else 3, args.no_resize) model_ft = torch.nn.DataParallel(model_ft).cuda() checkpoint = torch.load(args.ckpt_path) # below line is needed if network is trained with DataParallel and now the model is not initiated with dataparallel # base_dict = {'.'.join(k.split('.')[1:]): v for k,v in list(checkpoint['state_dict'].items())} # model_ft.load_state_dict(base_dict) model_ft.load_state_dict(checkpoint['state_dict']) print_and_save("Model loaded to gpu", log_file) mean = meanRGB if args.channels == 'RGB' else meanG std = stdRGB if args.channels == 'RGB' else stdG normalize = transforms.Normalize(mean=mean, std=std) if args.no_resize: resize = WidthCrop() else: if args.pad: resize = ResizePadFirst( 224, False, interpolation_methods[args.inter] ) # currently set this binarize to False, because it is false duh else: resize = Resize((224, 224), False, interpolation_methods[args.inter]) test_transforms = transforms.Compose( [resize, To01Range(args.bin_img), transforms.ToTensor(), normalize]) dataset_loader = ImageDatasetLoader(args.val_list, num_classes=args.verb_classes, batch_transform=test_transforms, channels=args.channels, validation=True) collate_fn = torch.utils.data.dataloader.default_collate dataset_iterator = torch.utils.data.DataLoader( dataset_loader, batch_size=args.batch_size, num_workers=args.num_workers, collate_fn=collate_fn, pin_memory=True) ce_loss = torch.nn.CrossEntropyLoss().cuda() validate = validate_resnet top1, outputs = validate(model_ft, ce_loss, dataset_iterator, checkpoint['epoch'], args.val_list.split("\\")[-1], log_file) #video_pred = [np.argmax(x[0].detach().cpu().numpy()) for x in outputs] #video_labels = [x[1].cpu().numpy() for x in outputs] video_preds = [x[0] for x in outputs] video_labels = [x[1] for x in outputs] cf, recall, precision, cls_acc, mean_cls_acc, top1_acc = analyze_preds_labels( video_preds, video_labels) print_and_save(cf, log_file) print_and_save("Cls Rec {}".format(recall), log_file) print_and_save("Cls Pre {}".format(precision), log_file) print_and_save("Cls Acc {}".format(cls_acc), log_file) print_and_save("Mean Cls Acc {:.02f}%".format(mean_cls_acc), log_file) print_and_save("Dataset Acc {}".format(top1_acc), log_file)