예제 #1
0
def runner_func(dataset, state_dict, gpu_id, index_queue, result_queue):
    torch.cuda.set_device(gpu_id)
    net = BinaryClassifier(num_class, 5,
                           args.modality, test_mode=True, new_length=data_length,
                           base_model=args.arch)

    net.load_state_dict(state_dict)
    net.prepare_test_fc()
    net.eval()
    net.cuda()
    output_dim = net.test_fc.out_features
    while True:
        index = index_queue.get()
        frames_gen, frame_cnt = dataset[index]
        num_crop = args.test_crops
        length = 3
        if args.modality == 'Flow':
            length = 10
        elif args.modality == 'RGBDiff':
            length = 18

        output = torch.zeros((frame_cnt, num_crop, output_dim)).cuda()
        cnt = 0
        for frames in frames_gen:
            input_var = torch.autograd.Variable(frames.view(-1, length, frames.size(-2), frames.size(-1)).cuda(),
                                                volatile=True)
            rst, _ = net(input_var, None)
            sc = rst.data.view(-1, num_crop, output_dim)
            output[cnt:cnt + sc.size(0), :, :] = sc
            cnt += sc.size(0)
        if hasattr(dataset, 'video_list'):
            result_queue.put((dataset.video_list[index].id.split('/')[-1], output.cpu().numpy()))
        elif hasattr(dataset, 'video_dict'): 
            result_queue.put((dataset.video_dict.keys()[index].split('/')[-1], output.cpu().numpy()))
예제 #2
0
def runner_func(dataset, state_dict, gpu_id, index_queue, result_queue):
    torch.cuda.set_device(gpu_id)
    net = BinaryClassifier(num_class,
                           args.num_body_segments,
                           args,
                           dropout=args.dropout,
                           test_mode=True)
    # net = torch.nn.DataParallel(net, device_ids=[gpu_id])

    net.load_state_dict(state_dict)
    net.eval()
    net.cuda()
    while True:
        index = index_queue.get()
        feature, feature_mask, num_feat, pos_ind, video_id, _ = dataset[index]
        feature = feature.cuda()
        feature_mask = feature_mask.cuda()
        pos_ind = pos_ind.cuda()
        video_id = video_id
        with torch.no_grad():
            rois, actness, roi_scores = net(feature,
                                            pos_ind,
                                            feature_mask=feature_mask,
                                            test_mode=True)
            rois, actness, roi_scores = rois[0].cpu().numpy(), actness[0].cpu(
            ).numpy(), roi_scores[0].cpu().numpy()[:, 1]
            outputs = [rois, actness, roi_scores, num_feat]

        result_queue.put(
            (dataset.video_list[index].id.split('/')[-1], outputs))
예제 #3
0
def main():
    global args, best_loss
    args = parser.parse_args()
    dataset_configs = get_actionness_configs(args.dataset)
    sampling_configs = dataset_configs["sampling"]
    num_class = dataset_configs["num_class"]
    args.dropout = 0.8
    if args.modality == "RGB":
        data_length = 1
    elif args.modality in ["Flow", "RGBDiff"]:
        data_length = 5
    else:
        raise ValueError("unknown modality {}".format(args.modality))

    model = BinaryClassifier(
        num_class,
        args.num_body_segments,
        args.modality,
        new_length=data_length,
        base_model=args.arch,
        dropout=args.dropout,
        bn_mode=args.bn_mode,
    )

    if args.init_weights:
        if os.path.isfile(args.init_weights):
            print(("=> loading pretrained weights from '{}'".format(
                args.init_weights)))
            wd = torch.load(args.init_weights)
            model.base_model.load_state_dict(wd["state_dict"])
            print(
                ("=> no weights file found at '{}'".format(args.init_weights)))
        else:
            print(
                ("=> no weights file found at '{}'".format(args.init_weights)))
    elif args.kinetics_pretrain:
        model_url = dataset_configs["kinetics_pretrain"][args.arch][
            args.modality]
        model.base_model.load_state_dict(
            model_zoo.load_url(model_url)["state_dict"])
        print(("=> loaded init weights from '{}'".format(model_url)))
    else:
        # standard ImageNet pretraining
        if args.modality == "Flow":
            model_url = dataset_configs["flow_init"][args.arch]
            model.base_model.load_state_dict(
                model_zoo.load_url(model_url)["state_dict"])
            print(("=> loaded flow init weights from '{}'".format(model_url)))

    crop_size = model.crop_size
    scale_size = model.scale_size
    input_mean = model.input_mean
    input_std = model.input_std
    policies = model.get_optim_policies()
    train_augmentation = model.get_augmentation()

    model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda()

    cudnn.benchmark = True
    pin_memory = args.modality == "RGB"

    # Data loading code
    if args.modality != "RGBDiff":
        normalize = GroupNormalize(input_mean, input_std)
    else:
        normalize = IdentityTransform()

    train_prop_file = "data/{}_proposal_list.txt".format(
        dataset_configs["train_list"])
    val_prop_file = "data/{}_proposal_list.txt".format(
        dataset_configs["test_list"])
    train_loader = torch.utils.data.DataLoader(
        BinaryDataSet(
            "",
            train_prop_file,
            new_length=data_length,
            modality=args.modality,
            exclude_empty=True,
            body_seg=args.num_body_segments,
            image_tmpl="img_{:05d}.jpg" if args.modality in ["RGB", "RGBDiff"]
            else args.flow_prefix + "{}_{:05d}.jpg",
            transform=torchvision.transforms.Compose([
                train_augmentation,
                Stack(roll=(args.arch in ["BNInception", "InceptionV3"])),
                ToTorchFormatTensor(
                    div=(args.arch not in ["BNInception", "InceptionV3"])),
                normalize,
            ]),
        ),
        batch_size=4,
        shuffle=True,
        num_workers=args.workers,
        pin_memory=pin_memory,
        drop_last=True,
    )

    val_loader = torch.utils.data.DataLoader(
        BinaryDataSet(
            "",
            val_prop_file,
            new_length=data_length,
            modality=args.modality,
            exclude_empty=True,
            body_seg=args.num_body_segments,
            image_tmpl="img_{:05}.jpg" if args.modality in ["RGB", "RGBDiff"]
            else args.flow_prefix + "{}_{:05d}.jpg",
            random_shift=False,
            fg_ratio=6,
            bg_ratio=6,
            transform=torchvision.transforms.Compose([
                GroupScale(int(scale_size)),
                GroupCenterCrop(crop_size),
                Stack(roll=(args.arch in ["BNInception", "InceptionV3"])),
                ToTorchFormatTensor(
                    div=(args.arch not in ["BNInception", "InceptionV3"])),
                normalize,
            ]),
        ),
        batch_size=4,
        shuffle=False,
        num_workers=args.workers,
        pin_memory=pin_memory,
    )

    binary_criterion = torch.nn.CrossEntropyLoss().cuda()

    for group in policies:
        print(("group: {} has {} params, lr_mult: {}, decay_mult: {}".format(
            group["name"],
            len(group["params"]),
            group["lr_mult"],
            group["decay_mult"],
        )))

    optimizer = torch.optim.SGD(policies,
                                args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    for epoch in range(args.start_epoch, args.epochs):
        adjust_learning_rate(optimizer, epoch, args.lr_steps)
        # train for one epoch
        train(train_loader, model, binary_criterion, optimizer, epoch)

        # evaluate on validation list
        if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1:
            loss = validate(val_loader, model, binary_criterion,
                            (epoch + 1) * len(train_loader))

            # remember best prec@1 and save checkpoint
            is_best = loss < best_loss
            best_loss = min(loss, best_loss)
            save_checkpoint(
                {
                    "epoch": epoch + 1,
                    "arch": args.arch,
                    "state_dict": model.state_dict(),
                    "best_loss": best_loss,
                },
                is_best,
            )
예제 #4
0
                volatile=True,
            )
            rst, _ = net(input_var, None)
            sc = rst.data.view(-1, num_crop, output_dim)
            output[cnt : cnt + sc.size(0), :, :] = sc
            cnt += sc.size(0)

        result_queue.put(
            (dataset.video_list[index].id.split("/")[-1], output.cpu().numpy())
        )


if __name__ == "__main__":

    ctx = multiprocessing.get_context("spawn")
    net = BinaryClassifier(num_class, 5, args.modality, base_model=args.arch)

    if args.test_crops == 1:
        cropping = torchvision.transforms.Compose(
            [GroupScale(net.scale_size), GroupScale(net.input_size)]
        )
    elif args.test_crops == 10:
        cropping = torchvision.transforms.Compose(
            [GroupOverSample(net.input_size, net.scale_size)]
        )
    else:
        raise ValueError(
            "only 1 and 10 crops are supported while we got {}".format(args.test_crop)
        )

    if not args.use_reference and not args.use_kinetics_reference:
예제 #5
0
                                            feature_mask=feature_mask,
                                            test_mode=True)
            rois, actness, roi_scores = rois[0].cpu().numpy(), actness[0].cpu(
            ).numpy(), roi_scores[0].cpu().numpy()[:, 1]
            # import pdb; pdb.set_trace()
            outputs = [rois, actness, roi_scores, num_feat]
            result[video_id] = outputs
    return result


if __name__ == '__main__':

    ctx = multiprocessing.get_context('spawn')
    net = BinaryClassifier(num_class,
                           args.num_body_segments,
                           args,
                           dropout=args.dropout,
                           test_mode=True)

    checkpoint = torch.load(args.weights)

    print("model epoch {} loss: {}".format(checkpoint['epoch'],
                                           checkpoint['best_loss']))
    base_dict = {
        '.'.join(k.split('.')[1:]): v
        for k, v in list(checkpoint['state_dict'].items())
    }
    db = ANetDB.get_db("1.3")
    val_videos = db.get_subset_videos(args.subset)

    loader = torch.utils.data.DataLoader(BinaryDataSet(
예제 #6
0
def main():
    global args, best_loss
    args = parser.parse_args()
    dataset_configs = get_actionness_configs(args.dataset)
    sampling_configs = dataset_configs['sampling']
    num_class = dataset_configs['num_class']
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed(args.seed)
    db = ANetDB.get_db("1.3")

    # set the directory for the rgb features
    if args.feat_model == 'i3d_rgb' or args.feat_model == 'i3d_rgb_trained':
        args.input_dim = 1024
    elif args.feat_model == 'inception_resnet_v2' or args.feat_model == 'inception_resnet_v2_trained':
        args.input_dim = 1536
    if args.use_flow:
        if not args.only_flow:
            args.input_dim += 1024
        else:
            args.input_dim = 1024
    print(("=> the input features are extracted from '{}' and the dim is '{}'"
           ).format(args.feat_model, args.input_dim))
    # if reduce the dimension of input feature first
    if args.reduce_dim > 0:
        assert args.reduce_dim % args.n_head == 0, "reduce_dim {} % n_head {} != 0".format(
            args.reduce_dim, args.n_head)
        args.d_k = int(args.reduce_dim // args.n_head)
        args.d_v = args.d_k
    else:
        assert args.input_dim % args.n_head == 0, "input_dim {} % n_head {} != 0".format(
            args.input_dim, args.n_head)
        args.d_k = int(args.input_dim // args.n_head)
        args.d_v = args.d_k
    args.d_model = args.n_head * args.d_k

    if not os.path.exists(args.result_path):
        os.makedirs(args.result_path)
    if args.pos_enc:
        save_path = os.path.join(
            args.result_path, '_'.join(
                (args.att_kernel_type, 'N' + str(args.n_layers))))
    else:
        save_path = os.path.join(
            args.result_path, '_'.join(
                (args.att_kernel_type, 'N' + str(args.n_layers)))) + '_nopos'
    if args.num_local > 0:
        save_path = save_path + '_loc' + str(args.num_local) + args.local_type
        if args.dilated_mask:
            save_path += '_dilated'
    if args.groupwise_heads > 0:
        save_path = save_path + '_G' + str(args.groupwise_heads)
    if len(args.roi_poolsize) > 0:
        save_path = save_path + '_roi' + str(args.roi_poolsize)
    model_name = os.path.split(save_path)[1]
    # logger = Logger('./logs/{}'.format(model_name))
    logger = None

    model = BinaryClassifier(num_class,
                             args.num_body_segments,
                             args,
                             dropout=args.dropout)
    model = torch.nn.DataParallel(model, device_ids=None).cuda()

    cudnn.enabled = False
    # cudnn.benchmark = True
    pin_memory = True
    train_prop_file = 'data/{}_proposal_list.txt'.format(
        dataset_configs['train_list'])
    val_prop_file = 'data/{}_proposal_list.txt'.format(
        dataset_configs['test_list'])
    train_videos = db.get_subset_videos('training')
    val_videos = db.get_subset_videos('validation')
    train_loader = torch.utils.data.DataLoader(BinaryDataSet(
        args.feat_root,
        args.feat_model,
        train_prop_file,
        train_videos,
        exclude_empty=True,
        body_seg=args.num_body_segments,
        input_dim=args.d_model,
        prop_per_video=args.prop_per_video,
        fg_ratio=6,
        bg_ratio=6,
        num_local=args.num_local,
        use_flow=args.use_flow,
        only_flow=args.only_flow),
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               num_workers=args.workers,
                                               pin_memory=pin_memory,
                                               drop_last=True)

    # val_loader = torch.utils.data.DataLoader(
    #     BinaryDataSet(args.feat_root, args.feat_model, val_prop_file, val_videos,
    #                   exclude_empty=True, body_seg=args.num_body_segments,
    #                   input_dim=args.d_model, prop_per_video=args.prop_per_video,
    #                   fg_ratio=6, bg_ratio=6, num_local=args.num_local,
    #                   use_flow=args.use_flow, only_flow=args.only_flow),
    #     batch_size=args.batch_size//2, shuffle=False,
    #     num_workers=args.workers*2, pin_memory=pin_memory)
    val_loader = torch.utils.data.DataLoader(BinaryDataSet(
        args.feat_root,
        args.feat_model,
        val_prop_file,
        subset_videos=val_videos,
        exclude_empty=True,
        body_seg=args.num_body_segments,
        input_dim=args.d_model,
        test_mode=True,
        use_flow=args.use_flow,
        verbose=False,
        num_local=args.num_local,
        only_flow=args.only_flow),
                                             batch_size=1,
                                             shuffle=False,
                                             num_workers=10,
                                             pin_memory=True)

    ground_truth, cls_to_idx = grd_activity(
        'data/activity_net.v1-3.min_save.json', subset='validation')
    del cls_to_idx['background']

    # optimizer = torch.optim.Adam(
    #         model.parameters(),
    #         args.lr, weight_decay=args.weight_decay)

    optimizer = AdamW(model.parameters(),
                      args.lr,
                      weight_decay=args.weight_decay)

    # optimizer = torch.optim.SGD(model.parameters(),
    #                             args.lr,
    #                             momentum=args.momentum,
    #                             weight_decay=args.weight_decay, nesterov=False)

    if args.resume is not None and len(args.resume) > 0:
        model.load_state_dict(torch.load(args.resume)['state_dict'],
                              strict=False)
    criterion_stage1 = CE_Criterion_multi(use_weight=True)
    criterion_stage2 = Rank_Criterion(epsilon=0.02)

    patience = 0
    for epoch in range(args.start_epoch, args.epochs):
        # adjust_learning_rate(optimizer, epoch, args.lr_steps)
        # train for one epoch
        if patience > 5:
            break
        train(train_loader, model, optimizer, criterion_stage1,
              criterion_stage2, epoch, logger)

        # evaluate on validation list
        if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1:
            loss = validate(val_loader, model, ground_truth,
                            (epoch + 1) * len(train_loader), epoch)

            # remember best prec@1 and save checkpoint
            is_best = 1.0001 * loss < best_loss
            if is_best:
                patience = 0
            else:
                patience += 1
            best_loss = min(loss, best_loss)
            save_checkpoint(
                {
                    'epoch': epoch + 1,
                    'arch': args.model,
                    'state_dict': model.state_dict(),
                    'best_loss': best_loss,
                }, is_best, save_path)