Esempio n. 1
0
def main(args):
    os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    enc_score_metrics = []
    for _ in range(len(args.step_size)):
        enc_score_metrics.append([])

    enc_target_metrics = []

    if osp.isfile(args.checkpoint):
        checkpoint = torch.load(args.checkpoint)
    else:
        raise (RuntimeError('Cannot find the checkpoint {}'.format(
            args.checkpoint)))

    model = build_model(args).to(device)
    model.load_state_dict(checkpoint['model_state_dict'])
    model.train(False)

    softmax = nn.Softmax(dim=1).to(device)

    thumos_background_score = np.array(
        [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

    tvseries_background_score = np.array([
        1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0
    ])

    for session_idx, session in enumerate(args.test_session_set, start=1):
        start = time.time()
        with torch.set_grad_enabled(False):
            camera_inputs = np.load(osp.join(args.data_root,
                                             args.camera_feature,
                                             session + '.npy'),
                                    mmap_mode='r')
            motion_inputs = np.load(osp.join(args.data_root,
                                             args.motion_feature,
                                             session + '.npy'),
                                    mmap_mode='r')
            target = np.load(
                osp.join(args.data_root, 'target', session + '.npy'))
            enc_hx = to_device(torch.zeros(model.hidden_size), device)
            enc_cx = to_device(torch.zeros(model.hidden_size), device)

            for l in range(target.shape[0]):

                enc_target_metrics.append(target[l])

            for i, steps in enumerate(args.step_size):

                step = int(steps)

                for l in range(target.shape[0]):

                    if l < int(step):
                        if args.dataset == 'THUMOS':
                            enc_score_metrics[i].append(
                                thumos_background_score)
                        elif args.dataset == 'TVSeries':
                            enc_score_metrics[i].append(
                                tvseries_background_score)
                    else:
                        camera_input = to_device(
                            torch.as_tensor(camera_inputs[l - step].astype(
                                np.float32)), device)
                        motion_input = to_device(
                            torch.as_tensor(motion_inputs[l - step].astype(
                                np.float32)), device)

                        enc_hx, enc_cx, enc_score = \
                                model.step(camera_input, motion_input, enc_hx, enc_cx, step)

                        if args.dirichlet:
                            enc_score_metrics[i].append(
                                enc_score.cpu().numpy()[0])
                        else:
                            enc_score_metrics[i].append(
                                softmax(enc_score).cpu().numpy()[0])

        end = time.time()

        print(
            'Processed session {}, {:2} of {}, running time {:.2f} sec'.format(
                session, session_idx, len(args.test_session_set), end - start))

    save_dir = osp.dirname(args.checkpoint)
    result_file = osp.basename(args.checkpoint).replace('.pth', '.json')
    # Compute result for encoder

    if args.dataset == "THUMOS":
        for i, steps in enumerate(args.step_size):
            print('Step size:   ', steps)
            print(len(enc_score_metrics[i]))
            print(len(enc_target_metrics))
            utl.compute_result_multilabel(args.dataset,
                                          args.class_index,
                                          enc_score_metrics[i],
                                          enc_target_metrics,
                                          save_dir,
                                          result_file,
                                          ignore_class=[0, 21],
                                          save=True,
                                          verbose=True)
    elif args.dataset == "TVSeries":
        utl.compute_result_multilabel(args.dataset,
                                      args.class_index,
                                      enc_score_metrics[i],
                                      enc_target_metrics,
                                      save_dir,
                                      result_file,
                                      ignore_class=[0],
                                      save=True,
                                      verbose=True)
Esempio n. 2
0
def main(args):
    os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    enc_score_metrics = []
    delta_score_metrics = []

    state_metrics = []
    oad_time_metrics = []

    enc_target_metrics = []

    enc_variance_score_metrics = []
    delta_variance_score_metrics = []

    if osp.isfile(args.checkpoint):
        checkpoint = torch.load(args.checkpoint)
    else:
        raise (RuntimeError('Cannot find the checkpoint {}'.format(
            args.checkpoint)))

    model = build_model(args).to(device)
    model.load_state_dict(checkpoint['model_state_dict'])
    model.train(False)

    softmax = nn.Softmax(dim=1).to(device)

    thumos_background_score = np.array(
        [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

    tvseries_background_score = np.array([
        1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0
    ])

    for session_idx, session in enumerate(args.test_session_set, start=1):

        start = time.time()

        with torch.set_grad_enabled(False):
            camera_inputs = np.load(osp.join(args.data_root,
                                             args.camera_feature,
                                             session + '.npy'),
                                    mmap_mode='r')
            motion_inputs = np.load(osp.join(args.data_root,
                                             args.motion_feature,
                                             session + '.npy'),
                                    mmap_mode='r')

            target = np.load(
                osp.join(args.data_root, 'target', session + '.npy'))

            enc_hx = to_device(torch.zeros(model.hidden_size), device)
            enc_cx = to_device(torch.zeros(model.hidden_size), device)
            d_enc_hx = to_device(torch.zeros(model.hidden_size), device)
            d_enc_cx = to_device(torch.zeros(model.hidden_size), device)

            dummy_score = to_device(torch.zeros(args.num_classes), device)
            oad_score = []
            oad_score.append(dummy_score)

            for l in range(target.shape[0]):

                enc_target_metrics.append(target[l])

                delta_score_metrics.append(thumos_background_score)

                # for _ in range(3):
                #     delta_score_metrics.append(thumos_background_score)

                camera_input = to_device(
                    torch.as_tensor(camera_inputs[l].astype(np.float32)),
                    device)
                motion_input = to_device(
                    torch.as_tensor(motion_inputs[l].astype(np.float32)),
                    device)

                enc_hx, enc_cx, enc_score, enc_var = \
                    model.step(camera_input, motion_input, enc_hx, enc_cx, d_enc_hx, d_enc_cx, dummy_score, delta=False)

                # enc_hx, enc_cx, enc_score, enc_var = \
                #     model.step(camera_input, motion_input, enc_hx, enc_cx, d_enc_hx, d_enc_cx, oad_score[-1], delta=False)

                oad_score.append(enc_score)

                delta_camera_input = to_device(
                    torch.as_tensor(camera_inputs[l - 1].astype(np.float32)),
                    device)
                delta_motion_input = to_device(
                    torch.as_tensor(motion_inputs[l - 1].astype(np.float32)),
                    device)

                # if l >= 3:
                #     delta_camera_input = to_device(
                #         torch.as_tensor(camera_inputs[l-3].astype(np.float32)), device)
                #     delta_motion_input = to_device(
                #         torch.as_tensor(motion_inputs[l-3].astype(np.float32)), device)

                #     d_enc_hx, d_enc_cx, delta_score, delta_var = \
                #     model.step(delta_camera_input, delta_motion_input, enc_hx, enc_cx, d_enc_hx, d_enc_cx, oad_score[-2], delta=True)
                #     delta_score_metrics.append(delta_score.cpu().numpy()[0])


                d_enc_hx, d_enc_cx, delta_score, delta_var = \
                    model.step(delta_camera_input, delta_motion_input, enc_hx, enc_cx, d_enc_hx, d_enc_cx, oad_score[-2], delta=True)

                delta_score_metrics.append(delta_score.cpu().numpy()[0])

                if args.dirichlet:
                    enc_score_metrics.append(enc_score.cpu().numpy()[0])

                else:
                    enc_score_metrics.append(
                        softmax(enc_score).cpu().numpy()[0])

                if len(enc_score_metrics) > 1:
                    state = np.add(delta_score.cpu().numpy()[0],
                                   enc_score_metrics[-2])
                    oad_state = enc_score_metrics[-2]
                else:
                    state = enc_score.cpu().numpy()[0]
                    oad_state = enc_score.cpu().numpy()[0]

                state_metrics.append(state)
                oad_time_metrics.append(oad_state)

                # if len(enc_score_metrics) > 1:
                #     print('TARGET')
                #     print(target[l])
                #     print('OAD Score at t')
                #     print(enc_score.cpu().numpy()[0])
                #     print('OAD score at t-1')
                #     print(enc_score_metrics[-2])
                #     print('DELTA SCORE')
                #     print(delta_score.cpu().numpy()[0])
                #     print('STATE = add oad t-1 and delta')
                #     print(state)

                # if len(state_metrics) > 0:
                #     # state = np.add(state_metrics[-1], delta_score.view(-1,1).cpu())
                # else:
                #     state = enc_score

                ### compute coefficient of kalman filter

                # inverse_enc = np.linalg.inv(enc_var)
                # inverse_delta = np.linalg.inv(delta_var.cpu())
                # summ = np.add(inverse_enc, inverse_delta)
                # inverse_summ = np.linalg.inv(summ)
                # oad_coeff = np.dot(inverse_summ, inverse_enc)
                # delta_coeff = np.dot(inverse_summ, inverse_delta)
                # # print(np.add(oad_coeff,delta_coeff))  # check identity matrix
                # oad_update = np.dot(oad_coeff, enc_score.view(-1,1).cpu())
                # delta_update = np.dot(delta_coeff, state.view(-1,1).cpu())
                # state_update = np.add(oad_update, delta_update)
                # state_metrics.append(state_update)

        # np.set_printoptions(formatter={'float': lambda x: "{0:0.4f}".format(x)})
        end = time.time()

        print(
            'Processed session {}, {:2} of {}, running time {:.2f} sec'.format(
                session, session_idx, len(args.test_session_set), end - start))

    save_dir = osp.dirname(args.checkpoint)
    result_file = osp.basename(args.checkpoint).replace('.pth', '.json')
    # Compute result for encoder

    if args.dataset == "THUMOS":
        print(len(state_metrics))
        print(len(enc_target_metrics))
        utl.compute_result_multilabel(args.dataset,
                                      args.class_index,
                                      state_metrics,
                                      enc_target_metrics,
                                      save_dir,
                                      result_file,
                                      ignore_class=[0, 21],
                                      save=True,
                                      verbose=True)

        print('oad TIME mAP')
        utl.compute_result_multilabel(args.dataset,
                                      args.class_index,
                                      oad_time_metrics,
                                      enc_target_metrics,
                                      save_dir,
                                      result_file,
                                      ignore_class=[0, 21],
                                      save=True,
                                      verbose=True)

        print('oad mAP')
        utl.compute_result_multilabel(args.dataset,
                                      args.class_index,
                                      enc_score_metrics,
                                      enc_target_metrics,
                                      save_dir,
                                      result_file,
                                      ignore_class=[0, 21],
                                      save=True,
                                      verbose=True)

    elif args.dataset == "TVSeries":
        for i, steps in enumerate(args.step_size):
            print('Step size:   ', steps)
            utl.compute_result_multilabel(args.dataset,
                                          args.class_index,
                                          enc_score_metrics[i],
                                          enc_target_metrics,
                                          save_dir,
                                          result_file,
                                          ignore_class=[0],
                                          save=True,
                                          verbose=True)
Esempio n. 3
0
def main(args):
    this_dir = osp.join(osp.dirname(__file__), '.')
    save_dir = osp.join(this_dir, 'checkpoints')
    if not osp.isdir(save_dir):
        os.makedirs(save_dir)
    command = 'python ' + ' '.join(sys.argv)
    logger = utl.setup_logger(osp.join(this_dir, 'log.txt'), command=command)
    os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    utl.set_seed(int(args.seed))

    model = build_model(args)
    if osp.isfile(args.checkpoint):
        checkpoint = torch.load(args.checkpoint,
                                map_location=torch.device('cpu'))
        model.load_state_dict(checkpoint['model_state_dict'])
    else:
        model.apply(utl.weights_init)
    if args.distributed:
        model = nn.DataParallel(model)
    model = model.to(device)

    criterion = utl.MultiCrossEntropyLoss(ignore_index=21).to(device)
    optimizer = optim.Adam(model.parameters(),
                           lr=args.lr,
                           weight_decay=args.weight_decay)
    if osp.isfile(args.checkpoint):
        optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
        for param_group in optimizer.param_groups:
            param_group['lr'] = args.lr
        args.start_epoch += checkpoint['epoch']
    softmax = nn.Softmax(dim=1).to(device)

    for epoch in range(args.start_epoch, args.start_epoch + args.epochs):
        if epoch == 21:
            args.lr = args.lr * 0.1
            for param_group in optimizer.param_groups:
                param_group['lr'] = args.lr

        data_loaders = {
            phase: utl.build_data_loader(args, phase)
            for phase in args.phases
        }

        enc_losses = {phase: 0.0 for phase in args.phases}
        enc_score_metrics = []
        enc_target_metrics = []
        enc_mAP = 0.0
        dec_losses = {phase: 0.0 for phase in args.phases}
        dec_score_metrics = []
        dec_target_metrics = []
        dec_mAP = 0.0

        start = time.time()
        for phase in args.phases:
            training = phase == 'train'
            if training:
                model.train(True)
            elif not training and args.debug:
                model.train(False)
            else:
                continue

            with torch.set_grad_enabled(training):
                for batch_idx, (camera_inputs, motion_inputs, enc_target, dec_target) \
                        in enumerate(data_loaders[phase], start=1):
                    batch_size = camera_inputs.shape[0]
                    camera_inputs = camera_inputs.to(device)
                    motion_inputs = motion_inputs.to(device)
                    enc_target = enc_target.to(device).view(
                        -1, args.num_classes)
                    dec_target = dec_target.to(device).view(
                        -1, args.num_classes)

                    enc_score, dec_score = model(camera_inputs, motion_inputs)
                    enc_loss = criterion(enc_score, enc_target)
                    dec_loss = criterion(dec_score, dec_target)
                    enc_losses[phase] += enc_loss.item() * batch_size
                    dec_losses[phase] += dec_loss.item() * batch_size
                    if args.verbose:
                        print(
                            'Epoch: {:2} | iteration: {:3} | enc_loss: {:.5f} dec_loss: {:.5f}'
                            .format(epoch, batch_idx, enc_loss.item(),
                                    dec_loss.item()))

                    if training:
                        optimizer.zero_grad()
                        loss = enc_loss + dec_loss
                        loss.backward()
                        optimizer.step()
                    else:
                        # Prepare metrics for encoder
                        enc_score = softmax(enc_score).cpu().numpy()
                        enc_target = enc_target.cpu().numpy()
                        enc_score_metrics.extend(enc_score)
                        enc_target_metrics.extend(enc_target)
                        # Prepare metrics for decoder
                        dec_score = softmax(dec_score).cpu().numpy()
                        dec_target = dec_target.cpu().numpy()
                        dec_score_metrics.extend(dec_score)
                        dec_target_metrics.extend(dec_target)
        end = time.time()

        if args.debug:
            result_file = 'inputs-{}-epoch-{}.json'.format(args.inputs, epoch)
            # Compute result for encoder
            enc_mAP = utl.compute_result_multilabel(
                args.class_index,
                enc_score_metrics,
                enc_target_metrics,
                save_dir,
                result_file,
                ignore_class=[0, 21],
                save=True,
            )
            # Compute result for decoder
            dec_mAP = utl.compute_result_multilabel(
                args.class_index,
                dec_score_metrics,
                dec_target_metrics,
                save_dir,
                result_file,
                ignore_class=[0, 21],
                save=False,
            )

        # Output result
        logger.output(epoch,
                      enc_losses,
                      dec_losses,
                      len(data_loaders['train'].dataset),
                      len(data_loaders['test'].dataset),
                      enc_mAP,
                      dec_mAP,
                      end - start,
                      debug=args.debug)

        # Save model
        checkpoint_file = 'inputs-{}-epoch-{}.pth'.format(args.inputs, epoch)
        torch.save(
            {
                'epoch':
                epoch,
                'model_state_dict':
                model.module.state_dict()
                if args.distributed else model.state_dict(),
                'optimizer_state_dict':
                optimizer.state_dict(),
            }, osp.join(save_dir, checkpoint_file))
Esempio n. 4
0
def main(args):
    os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    enc_score_metrics = []
    enc_target_metrics = []
    dec_score_metrics = [[] for i in range(args.dec_steps)]
    dec_target_metrics = [[] for i in range(args.dec_steps)]

    if osp.isfile(args.checkpoint):
        checkpoint = torch.load(args.checkpoint)
    else:
        raise (RuntimeError('Cannot find the checkpoint {}'.format(
            args.checkpoint)))
    model = build_model(args).to(device)
    model.load_state_dict(checkpoint['model_state_dict'])
    model.train(False)

    softmax = nn.Softmax(dim=1).to(device)

    for session_idx, session in enumerate(args.test_session_set, start=1):
        start = time.time()
        with torch.set_grad_enabled(False):
            camera_inputs = np.load(osp.join(args.data_root,
                                             args.camera_feature,
                                             session + '.npy'),
                                    mmap_mode='r')
            motion_inputs = np.load(osp.join(args.data_root,
                                             args.motion_feature,
                                             session + '.npy'),
                                    mmap_mode='r')
            target = np.load(
                osp.join(args.data_root, 'target', session + '.npy'))
            future_input = to_device(torch.zeros(model.future_size), device)
            enc_hx = to_device(torch.zeros(model.hidden_size), device)
            enc_cx = to_device(torch.zeros(model.hidden_size), device)

            for l in range(target.shape[0]):
                camera_input = to_device(
                    torch.as_tensor(camera_inputs[l].astype(np.float32)),
                    device)
                motion_input = to_device(
                    torch.as_tensor(motion_inputs[l].astype(np.float32)),
                    device)

                future_input, enc_hx, enc_cx, enc_score, dec_score_stack = \
                        model.step(camera_input, motion_input, future_input, enc_hx, enc_cx)

                enc_score_metrics.append(softmax(enc_score).cpu().numpy()[0])
                enc_target_metrics.append(target[l])

                for step in range(args.dec_steps):
                    dec_score_metrics[step].append(
                        softmax(dec_score_stack[step]).cpu().numpy()[0])
                    dec_target_metrics[step].append(target[min(
                        l + step, target.shape[0] - 1)])
        end = time.time()

        print(
            'Processed session {}, {:2} of {}, running time {:.2f} sec'.format(
                session, session_idx, len(args.test_session_set), end - start))

    save_dir = osp.dirname(args.checkpoint)
    result_file = osp.basename(args.checkpoint).replace('.pth', '.json')
    # Compute result for encoder
    utl.compute_result_multilabel(args.class_index,
                                  enc_score_metrics,
                                  enc_target_metrics,
                                  save_dir,
                                  result_file,
                                  ignore_class=[0, 21],
                                  save=True,
                                  verbose=True)

    # Compute result for decoder
    for step in range(args.dec_steps):
        utl.compute_result_multilabel(args.class_index,
                                      dec_score_metrics[step],
                                      dec_target_metrics[step],
                                      save_dir,
                                      result_file,
                                      ignore_class=[0, 21],
                                      save=False,
                                      verbose=True)