def test(args, net_model, model_path=None): if model_path is not None: net_model = torch.load(model_path) print(">>> [Testing] Load pretrained model from " + model_path) net_model.eval() AVEData = AVE_Fully_Dataset(video_dir=args.dir_video, audio_dir=args.dir_audio, label_dir=args.dir_labels, order_dir=args.dir_order_test, batch_size=402, status='test') nb_batch = AVEData.__len__() audio_inputs, video_inputs, labels, _, _, = AVEData.get_batch(0) audio_inputs = audio_inputs.to(device) video_inputs = video_inputs.to(device) labels = labels.to(device) fusion, out_prob, cross_att = net_model(audio_inputs, video_inputs, args.threshold) labels = labels.cpu().data.numpy() x_labels = out_prob.cpu().data.numpy() acc = compute_acc(labels, x_labels, nb_batch) print('[test]acc: ', acc) return acc
def test(args, net_model, model_path=None): if model_path is not None: net_model = torch.load(model_path) print(">>> [Testing] Load pretrained model from " + model_path) net_model.eval() AVEData = AVE_Weakly_Dataset(video_dir=args.dir_video, video_dir_bg=args.dir_video_bg, audio_dir=args.dir_audio, audio_dir_bg=args.dir_audio_bg, label_dir=args.dir_labels, prob_label_dir=args.prob_dir_labels, label_dir_bg=args.dir_labels_bg, label_dir_gt=args.dir_labels_gt, order_dir=args.dir_order_test, batch_size=402, status="test") nb_batch = AVEData.__len__() audio_inputs, video_inputs, labels = AVEData.get_batch(0) audio_inputs = audio_inputs.cuda() video_inputs = video_inputs.cuda() labels = labels.numpy() scores_avg, x_labels = net_model(audio_inputs, video_inputs, args.threshold) # shape: x_labels = F.softmax(x_labels, dim=-1) x_labels = x_labels.cpu().data.numpy() acc = compute_acc(labels, x_labels, nb_batch) return acc
def val(args, net_model): net_model.eval() AVEData = AVE_Weakly_Dataset(video_dir=args.dir_video, video_dir_bg=args.dir_video_bg, audio_dir=args.dir_audio, audio_dir_bg=args.dir_audio_bg, label_dir=args.dir_labels, prob_label_dir=args.prob_dir_labels, label_dir_bg=args.dir_labels_bg, label_dir_gt=args.dir_labels_gt, order_dir=args.dir_order_val, batch_size=402, status="val") nb_batch = AVEData.__len__() audio_inputs, video_inputs, labels = AVEData.get_batch(0) # labels: [bs, 10, 29] audio_inputs = audio_inputs.cuda() video_inputs = video_inputs.cuda() labels = labels.numpy() scores_avg, x_labels = net_model(audio_inputs, video_inputs, args.threshold) # shape: # x_labels: [bs, 10, 29] x_labels = F.softmax(x_labels, dim=-1) x_labels = x_labels.cpu().data.numpy() acc = compute_acc(labels, x_labels, nb_batch) print('val accuracy', acc) return acc
def val(args, net_model): net_model.eval() AVEData = AVE_Fully_Dataset(video_dir=args.dir_video, audio_dir=args.dir_audio, label_dir=args.dir_labels, order_dir=args.dir_order_val, batch_size=402, status='val') nb_batch = AVEData.__len__() audio_inputs, video_inputs, labels, _, _ = AVEData.get_batch(0) audio_inputs = audio_inputs.to(device) video_inputs = video_inputs.to(device) labels = labels.to(device) fusion, out_prob, cross_att = net_model(audio_inputs, video_inputs, args.threshold) labels = labels.cpu().data.numpy() x_labels = out_prob.cpu().data.numpy() acc = compute_acc(labels, x_labels, nb_batch) print('[val]acc: ', acc) return acc
def train(args, net_model, optimizer): AVEData = AVE_Fully_Dataset(video_dir=args.dir_video, audio_dir=args.dir_audio, label_dir=args.dir_labels, order_dir=args.dir_order_train, batch_size=args.batch_size, status='train') nb_batch = AVEData.__len__() // args.batch_size print('nb_batch:', nb_batch) epoch_l = [] best_val_acc = 0 best_test_acc = 0 best_epoch = 0 for epoch in range(args.nb_epoch): net_model.train() epoch_loss = 0 epoch_loss_cls = 0 epoch_loss_avps = 0 n = 0 start = time.time() SHUFFLE_SAMPLES = True for i in range(nb_batch): audio_inputs, video_inputs, labels, segment_label_batch, segment_avps_gt_batch = AVEData.get_batch( i, SHUFFLE_SAMPLES) SHUFFLE_SAMPLES = False audio_inputs = audio_inputs.to(device) video_inputs = video_inputs.to(device) labels = labels.to(device) segment_label_batch = segment_label_batch.to(device) segment_avps_gt_batch = segment_avps_gt_batch.to(device) net_model.zero_grad() fusion, out_prob, cross_att = net_model(audio_inputs, video_inputs, args.threshold) # shape: # out_prob: [bs, 10, 29], score_max: [bs, 29] loss_cls = nn.CrossEntropyLoss()(out_prob.permute( 0, 2, 1), segment_label_batch) # segment_label_batch: [bs, 10] loss_avps = AVPSLoss(cross_att, segment_avps_gt_batch) loss = loss_cls + args.LAMBDA * loss_avps epoch_loss += loss.cpu().data.numpy() epoch_loss_cls += loss_cls.cpu().data.numpy() epoch_loss_avps += loss_avps.cpu().data.numpy() loss.backward() optimizer.step_lr() n = n + 1 SHUFFLE_SAMPLES = True if (epoch + 1) % 60 == 0 and epoch < 170: optimizer.update_lr() end = time.time() epoch_l.append(epoch_loss) labels = labels.cpu().data.numpy() x_labels = out_prob.cpu().data.numpy() acc = compute_acc(labels, x_labels, nb_batch) print("=== Epoch {%s} lr: {%.6f} | Loss: [{%.4f}] loss_cls: [{%.4f}] | loss_frame: [{%.4f}] | training_acc {%.4f}" \ % (str(epoch), optimizer._optimizer.param_groups[0]['lr'], (epoch_loss) / n, epoch_loss_cls/n, epoch_loss_avps/n, acc)) if epoch % args.save_epoch == 0 and epoch != 0: val_acc = val(args, net_model) print('val accuracy:', val_acc, 'epoch=', epoch) if val_acc >= best_val_acc: best_val_acc = val_acc print('best val accuracy:', best_val_acc) print( 'best val accuracy: {} ***************************************' .format(best_val_acc)) # torch.save(net_model, model_name + "_" + str(epoch) + "_fully.pt") if epoch % args.check_epoch == 0 and epoch != 0: test_acc = test(args, net_model) print('test accuracy:', test_acc, 'epoch=', epoch) if test_acc >= best_test_acc: best_test_acc = test_acc best_epoch = epoch print( 'best test accuracy: {} =======================================' .format(best_test_acc)) torch.save(net_model, model_name + "_" + str(epoch) + "_fully.pt") print('[best val accuracy]: ', best_val_acc) print('[best test accuracy]: ', best_test_acc)