def main(args): os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') enc_score_metrics = [] for _ in range(len(args.step_size)): enc_score_metrics.append([]) enc_target_metrics = [] if osp.isfile(args.checkpoint): checkpoint = torch.load(args.checkpoint) else: raise (RuntimeError('Cannot find the checkpoint {}'.format( args.checkpoint))) model = build_model(args).to(device) model.load_state_dict(checkpoint['model_state_dict']) model.train(False) softmax = nn.Softmax(dim=1).to(device) thumos_background_score = np.array( [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]) tvseries_background_score = np.array([ 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ]) for session_idx, session in enumerate(args.test_session_set, start=1): start = time.time() with torch.set_grad_enabled(False): camera_inputs = np.load(osp.join(args.data_root, args.camera_feature, session + '.npy'), mmap_mode='r') motion_inputs = np.load(osp.join(args.data_root, args.motion_feature, session + '.npy'), mmap_mode='r') target = np.load( osp.join(args.data_root, 'target', session + '.npy')) enc_hx = to_device(torch.zeros(model.hidden_size), device) enc_cx = to_device(torch.zeros(model.hidden_size), device) for l in range(target.shape[0]): enc_target_metrics.append(target[l]) for i, steps in enumerate(args.step_size): step = int(steps) for l in range(target.shape[0]): if l < int(step): if args.dataset == 'THUMOS': enc_score_metrics[i].append( thumos_background_score) elif args.dataset == 'TVSeries': enc_score_metrics[i].append( tvseries_background_score) else: camera_input = to_device( torch.as_tensor(camera_inputs[l - step].astype( np.float32)), device) motion_input = to_device( torch.as_tensor(motion_inputs[l - step].astype( np.float32)), device) enc_hx, enc_cx, enc_score = \ model.step(camera_input, motion_input, enc_hx, enc_cx, step) if args.dirichlet: enc_score_metrics[i].append( enc_score.cpu().numpy()[0]) else: enc_score_metrics[i].append( softmax(enc_score).cpu().numpy()[0]) end = time.time() print( 'Processed session {}, {:2} of {}, running time {:.2f} sec'.format( session, session_idx, len(args.test_session_set), end - start)) save_dir = osp.dirname(args.checkpoint) result_file = osp.basename(args.checkpoint).replace('.pth', '.json') # Compute result for encoder if args.dataset == "THUMOS": for i, steps in enumerate(args.step_size): print('Step size: ', steps) print(len(enc_score_metrics[i])) print(len(enc_target_metrics)) utl.compute_result_multilabel(args.dataset, args.class_index, enc_score_metrics[i], enc_target_metrics, save_dir, result_file, ignore_class=[0, 21], save=True, verbose=True) elif args.dataset == "TVSeries": utl.compute_result_multilabel(args.dataset, args.class_index, enc_score_metrics[i], enc_target_metrics, save_dir, result_file, ignore_class=[0], save=True, verbose=True)
def main(args): os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') enc_score_metrics = [] delta_score_metrics = [] state_metrics = [] oad_time_metrics = [] enc_target_metrics = [] enc_variance_score_metrics = [] delta_variance_score_metrics = [] if osp.isfile(args.checkpoint): checkpoint = torch.load(args.checkpoint) else: raise (RuntimeError('Cannot find the checkpoint {}'.format( args.checkpoint))) model = build_model(args).to(device) model.load_state_dict(checkpoint['model_state_dict']) model.train(False) softmax = nn.Softmax(dim=1).to(device) thumos_background_score = np.array( [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]) tvseries_background_score = np.array([ 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ]) for session_idx, session in enumerate(args.test_session_set, start=1): start = time.time() with torch.set_grad_enabled(False): camera_inputs = np.load(osp.join(args.data_root, args.camera_feature, session + '.npy'), mmap_mode='r') motion_inputs = np.load(osp.join(args.data_root, args.motion_feature, session + '.npy'), mmap_mode='r') target = np.load( osp.join(args.data_root, 'target', session + '.npy')) enc_hx = to_device(torch.zeros(model.hidden_size), device) enc_cx = to_device(torch.zeros(model.hidden_size), device) d_enc_hx = to_device(torch.zeros(model.hidden_size), device) d_enc_cx = to_device(torch.zeros(model.hidden_size), device) dummy_score = to_device(torch.zeros(args.num_classes), device) oad_score = [] oad_score.append(dummy_score) for l in range(target.shape[0]): enc_target_metrics.append(target[l]) delta_score_metrics.append(thumos_background_score) # for _ in range(3): # delta_score_metrics.append(thumos_background_score) camera_input = to_device( torch.as_tensor(camera_inputs[l].astype(np.float32)), device) motion_input = to_device( torch.as_tensor(motion_inputs[l].astype(np.float32)), device) enc_hx, enc_cx, enc_score, enc_var = \ model.step(camera_input, motion_input, enc_hx, enc_cx, d_enc_hx, d_enc_cx, dummy_score, delta=False) # enc_hx, enc_cx, enc_score, enc_var = \ # model.step(camera_input, motion_input, enc_hx, enc_cx, d_enc_hx, d_enc_cx, oad_score[-1], delta=False) oad_score.append(enc_score) delta_camera_input = to_device( torch.as_tensor(camera_inputs[l - 1].astype(np.float32)), device) delta_motion_input = to_device( torch.as_tensor(motion_inputs[l - 1].astype(np.float32)), device) # if l >= 3: # delta_camera_input = to_device( # torch.as_tensor(camera_inputs[l-3].astype(np.float32)), device) # delta_motion_input = to_device( # torch.as_tensor(motion_inputs[l-3].astype(np.float32)), device) # d_enc_hx, d_enc_cx, delta_score, delta_var = \ # model.step(delta_camera_input, delta_motion_input, enc_hx, enc_cx, d_enc_hx, d_enc_cx, oad_score[-2], delta=True) # delta_score_metrics.append(delta_score.cpu().numpy()[0]) d_enc_hx, d_enc_cx, delta_score, delta_var = \ model.step(delta_camera_input, delta_motion_input, enc_hx, enc_cx, d_enc_hx, d_enc_cx, oad_score[-2], delta=True) delta_score_metrics.append(delta_score.cpu().numpy()[0]) if args.dirichlet: enc_score_metrics.append(enc_score.cpu().numpy()[0]) else: enc_score_metrics.append( softmax(enc_score).cpu().numpy()[0]) if len(enc_score_metrics) > 1: state = np.add(delta_score.cpu().numpy()[0], enc_score_metrics[-2]) oad_state = enc_score_metrics[-2] else: state = enc_score.cpu().numpy()[0] oad_state = enc_score.cpu().numpy()[0] state_metrics.append(state) oad_time_metrics.append(oad_state) # if len(enc_score_metrics) > 1: # print('TARGET') # print(target[l]) # print('OAD Score at t') # print(enc_score.cpu().numpy()[0]) # print('OAD score at t-1') # print(enc_score_metrics[-2]) # print('DELTA SCORE') # print(delta_score.cpu().numpy()[0]) # print('STATE = add oad t-1 and delta') # print(state) # if len(state_metrics) > 0: # # state = np.add(state_metrics[-1], delta_score.view(-1,1).cpu()) # else: # state = enc_score ### compute coefficient of kalman filter # inverse_enc = np.linalg.inv(enc_var) # inverse_delta = np.linalg.inv(delta_var.cpu()) # summ = np.add(inverse_enc, inverse_delta) # inverse_summ = np.linalg.inv(summ) # oad_coeff = np.dot(inverse_summ, inverse_enc) # delta_coeff = np.dot(inverse_summ, inverse_delta) # # print(np.add(oad_coeff,delta_coeff)) # check identity matrix # oad_update = np.dot(oad_coeff, enc_score.view(-1,1).cpu()) # delta_update = np.dot(delta_coeff, state.view(-1,1).cpu()) # state_update = np.add(oad_update, delta_update) # state_metrics.append(state_update) # np.set_printoptions(formatter={'float': lambda x: "{0:0.4f}".format(x)}) end = time.time() print( 'Processed session {}, {:2} of {}, running time {:.2f} sec'.format( session, session_idx, len(args.test_session_set), end - start)) save_dir = osp.dirname(args.checkpoint) result_file = osp.basename(args.checkpoint).replace('.pth', '.json') # Compute result for encoder if args.dataset == "THUMOS": print(len(state_metrics)) print(len(enc_target_metrics)) utl.compute_result_multilabel(args.dataset, args.class_index, state_metrics, enc_target_metrics, save_dir, result_file, ignore_class=[0, 21], save=True, verbose=True) print('oad TIME mAP') utl.compute_result_multilabel(args.dataset, args.class_index, oad_time_metrics, enc_target_metrics, save_dir, result_file, ignore_class=[0, 21], save=True, verbose=True) print('oad mAP') utl.compute_result_multilabel(args.dataset, args.class_index, enc_score_metrics, enc_target_metrics, save_dir, result_file, ignore_class=[0, 21], save=True, verbose=True) elif args.dataset == "TVSeries": for i, steps in enumerate(args.step_size): print('Step size: ', steps) utl.compute_result_multilabel(args.dataset, args.class_index, enc_score_metrics[i], enc_target_metrics, save_dir, result_file, ignore_class=[0], save=True, verbose=True)
def main(args): this_dir = osp.join(osp.dirname(__file__), '.') save_dir = osp.join(this_dir, 'checkpoints') if not osp.isdir(save_dir): os.makedirs(save_dir) command = 'python ' + ' '.join(sys.argv) logger = utl.setup_logger(osp.join(this_dir, 'log.txt'), command=command) os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') utl.set_seed(int(args.seed)) model = build_model(args) if osp.isfile(args.checkpoint): checkpoint = torch.load(args.checkpoint, map_location=torch.device('cpu')) model.load_state_dict(checkpoint['model_state_dict']) else: model.apply(utl.weights_init) if args.distributed: model = nn.DataParallel(model) model = model.to(device) criterion = utl.MultiCrossEntropyLoss(ignore_index=21).to(device) optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) if osp.isfile(args.checkpoint): optimizer.load_state_dict(checkpoint['optimizer_state_dict']) for param_group in optimizer.param_groups: param_group['lr'] = args.lr args.start_epoch += checkpoint['epoch'] softmax = nn.Softmax(dim=1).to(device) for epoch in range(args.start_epoch, args.start_epoch + args.epochs): if epoch == 21: args.lr = args.lr * 0.1 for param_group in optimizer.param_groups: param_group['lr'] = args.lr data_loaders = { phase: utl.build_data_loader(args, phase) for phase in args.phases } enc_losses = {phase: 0.0 for phase in args.phases} enc_score_metrics = [] enc_target_metrics = [] enc_mAP = 0.0 dec_losses = {phase: 0.0 for phase in args.phases} dec_score_metrics = [] dec_target_metrics = [] dec_mAP = 0.0 start = time.time() for phase in args.phases: training = phase == 'train' if training: model.train(True) elif not training and args.debug: model.train(False) else: continue with torch.set_grad_enabled(training): for batch_idx, (camera_inputs, motion_inputs, enc_target, dec_target) \ in enumerate(data_loaders[phase], start=1): batch_size = camera_inputs.shape[0] camera_inputs = camera_inputs.to(device) motion_inputs = motion_inputs.to(device) enc_target = enc_target.to(device).view( -1, args.num_classes) dec_target = dec_target.to(device).view( -1, args.num_classes) enc_score, dec_score = model(camera_inputs, motion_inputs) enc_loss = criterion(enc_score, enc_target) dec_loss = criterion(dec_score, dec_target) enc_losses[phase] += enc_loss.item() * batch_size dec_losses[phase] += dec_loss.item() * batch_size if args.verbose: print( 'Epoch: {:2} | iteration: {:3} | enc_loss: {:.5f} dec_loss: {:.5f}' .format(epoch, batch_idx, enc_loss.item(), dec_loss.item())) if training: optimizer.zero_grad() loss = enc_loss + dec_loss loss.backward() optimizer.step() else: # Prepare metrics for encoder enc_score = softmax(enc_score).cpu().numpy() enc_target = enc_target.cpu().numpy() enc_score_metrics.extend(enc_score) enc_target_metrics.extend(enc_target) # Prepare metrics for decoder dec_score = softmax(dec_score).cpu().numpy() dec_target = dec_target.cpu().numpy() dec_score_metrics.extend(dec_score) dec_target_metrics.extend(dec_target) end = time.time() if args.debug: result_file = 'inputs-{}-epoch-{}.json'.format(args.inputs, epoch) # Compute result for encoder enc_mAP = utl.compute_result_multilabel( args.class_index, enc_score_metrics, enc_target_metrics, save_dir, result_file, ignore_class=[0, 21], save=True, ) # Compute result for decoder dec_mAP = utl.compute_result_multilabel( args.class_index, dec_score_metrics, dec_target_metrics, save_dir, result_file, ignore_class=[0, 21], save=False, ) # Output result logger.output(epoch, enc_losses, dec_losses, len(data_loaders['train'].dataset), len(data_loaders['test'].dataset), enc_mAP, dec_mAP, end - start, debug=args.debug) # Save model checkpoint_file = 'inputs-{}-epoch-{}.pth'.format(args.inputs, epoch) torch.save( { 'epoch': epoch, 'model_state_dict': model.module.state_dict() if args.distributed else model.state_dict(), 'optimizer_state_dict': optimizer.state_dict(), }, osp.join(save_dir, checkpoint_file))
def main(args): os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') enc_score_metrics = [] enc_target_metrics = [] dec_score_metrics = [[] for i in range(args.dec_steps)] dec_target_metrics = [[] for i in range(args.dec_steps)] if osp.isfile(args.checkpoint): checkpoint = torch.load(args.checkpoint) else: raise (RuntimeError('Cannot find the checkpoint {}'.format( args.checkpoint))) model = build_model(args).to(device) model.load_state_dict(checkpoint['model_state_dict']) model.train(False) softmax = nn.Softmax(dim=1).to(device) for session_idx, session in enumerate(args.test_session_set, start=1): start = time.time() with torch.set_grad_enabled(False): camera_inputs = np.load(osp.join(args.data_root, args.camera_feature, session + '.npy'), mmap_mode='r') motion_inputs = np.load(osp.join(args.data_root, args.motion_feature, session + '.npy'), mmap_mode='r') target = np.load( osp.join(args.data_root, 'target', session + '.npy')) future_input = to_device(torch.zeros(model.future_size), device) enc_hx = to_device(torch.zeros(model.hidden_size), device) enc_cx = to_device(torch.zeros(model.hidden_size), device) for l in range(target.shape[0]): camera_input = to_device( torch.as_tensor(camera_inputs[l].astype(np.float32)), device) motion_input = to_device( torch.as_tensor(motion_inputs[l].astype(np.float32)), device) future_input, enc_hx, enc_cx, enc_score, dec_score_stack = \ model.step(camera_input, motion_input, future_input, enc_hx, enc_cx) enc_score_metrics.append(softmax(enc_score).cpu().numpy()[0]) enc_target_metrics.append(target[l]) for step in range(args.dec_steps): dec_score_metrics[step].append( softmax(dec_score_stack[step]).cpu().numpy()[0]) dec_target_metrics[step].append(target[min( l + step, target.shape[0] - 1)]) end = time.time() print( 'Processed session {}, {:2} of {}, running time {:.2f} sec'.format( session, session_idx, len(args.test_session_set), end - start)) save_dir = osp.dirname(args.checkpoint) result_file = osp.basename(args.checkpoint).replace('.pth', '.json') # Compute result for encoder utl.compute_result_multilabel(args.class_index, enc_score_metrics, enc_target_metrics, save_dir, result_file, ignore_class=[0, 21], save=True, verbose=True) # Compute result for decoder for step in range(args.dec_steps): utl.compute_result_multilabel(args.class_index, dec_score_metrics[step], dec_target_metrics[step], save_dir, result_file, ignore_class=[0, 21], save=False, verbose=True)