Ejemplo n.º 1
0
def eval_seq(
    opt,
    save_dir='/Users/ecom-v.ramesh/Documents/Personal/2020/DL/Trackjectory/output'
):
    # load net
    net = SiamRPNPP()
    load_net(net, opt.single_track_load_model)
    net.eval()  #.cuda()

    # image and init box
    image_files = sorted(
        glob.glob('/Users/ecom-v.ramesh/Desktop/kabadi/frames/frames2/*.png'))
    init_rbox = [695, 250, 885, 250, 695, 570, 885, 570]
    [cx, cy, w, h] = get_axis_aligned_bbox(init_rbox)

    # tracker init
    target_pos, target_sz = np.array([cx, cy]), np.array([w, h])
    im = cv2.imread(image_files[0])  # HxWxC
    state = SiamRPN_init(im, target_pos, target_sz, net)

    # tracking and visualization
    toc = 0
    for f, image_file in enumerate(image_files):
        im = cv2.imread(image_file)
        # print(im.shape)
        tic = cv2.getTickCount()
        state = SiamRPN_track(state, im)  # track
        toc += cv2.getTickCount() - tic
        res = cxy_wh_2_rect(state['target_pos'], state['target_sz'])
        res = [int(l) for l in res]
        # print(res)
        cv2.rectangle(im, (res[0], res[1]), (res[0] + res[2], res[1] + res[3]),
                      (0, 255, 255), 3)
        cv2.imwrite(
            '/Users/ecom-v.ramesh/Documents/Personal/2020/DL/Trackjectory/output2/siam/'
            + str(f) + '.png', im)

    print('Tracking Speed {:.1f}fps'.format(
        (len(image_files) - 1) / (toc / cv2.getTickFrequency())))
Ejemplo n.º 2
0
def main():

    import argparse
    parser = argparse.ArgumentParser(
        description="imsitu VSRL. Training, evaluation and prediction.")
    parser.add_argument("--gpuid",
                        default=-1,
                        help="put GPU id > -1 in GPU mode",
                        type=int)
    parser.add_argument('--output_dir',
                        type=str,
                        default='./trained_models',
                        help='Location to output the model')
    parser.add_argument('--resume_training',
                        action='store_true',
                        help='Resume training from the model [resume_model]')
    parser.add_argument('--resume_model',
                        type=str,
                        default='',
                        help='The model we resume')
    parser.add_argument('--evaluate',
                        action='store_true',
                        help='Only use the testing mode')
    parser.add_argument('--evaluate_rare',
                        action='store_true',
                        help='Only use the testing mode')
    parser.add_argument('--test',
                        action='store_true',
                        help='Only use the testing mode')
    parser.add_argument('--dataset_folder',
                        type=str,
                        default='./imSitu',
                        help='Location of annotations')
    parser.add_argument('--imgset_dir',
                        type=str,
                        default='./resized_256',
                        help='Location of original images')
    parser.add_argument('--train_file',
                        default="train_freq2000.json",
                        type=str,
                        help='trainfile name')
    parser.add_argument('--dev_file',
                        default="dev_freq2000.json",
                        type=str,
                        help='dev file name')
    parser.add_argument('--test_file',
                        default="test_freq2000.json",
                        type=str,
                        help='test file name')
    parser.add_argument('--model_saving_name',
                        type=str,
                        help='saving name of the outpul model')

    parser.add_argument('--epochs', type=int, default=500)
    parser.add_argument('--model', type=str, default='vgg_caq_joint')
    parser.add_argument('--batch_size', type=int, default=64)
    parser.add_argument('--seed', type=int, default=1111, help='random seed')
    parser.add_argument('--clip_norm', type=float, default=0.25)
    parser.add_argument('--num_workers', type=int, default=3)

    parser.add_argument('--vgg_verb_model',
                        type=str,
                        default='',
                        help='Pretrained vgg verb model')
    parser.add_argument('--tda_verb_model',
                        type=str,
                        default='',
                        help='Pretrained topdown verb model')
    parser.add_argument('--caq_model',
                        type=str,
                        default='',
                        help='Pretrained CAQ model')

    args = parser.parse_args()

    n_epoch = args.epochs
    batch_size = args.batch_size
    clip_norm = args.clip_norm
    n_worker = args.num_workers

    dataset_folder = args.dataset_folder
    imgset_folder = args.imgset_dir

    train_set = json.load(open(dataset_folder + '/' + args.train_file))

    encoder = imsitu_encoder.imsitu_encoder(train_set)

    train_set = imsitu_loader.imsitu_loader(imgset_folder, train_set, encoder,
                                            'train', encoder.train_transform)

    constructor = 'build_vgg_verb_classifier'
    vgg_verb_model = getattr(vgg_verb_classifier,
                             constructor)(len(encoder.verb_list))

    constructor = 'build_top_down_baseline'
    role_module = getattr(top_down_baseline_addemb,
                          constructor)(encoder.get_num_roles(),
                                       encoder.get_num_verbs(),
                                       encoder.get_num_labels(), encoder)

    constructor = 'build_top_down_verb'
    tda_verb_model = getattr(top_down_verb,
                             constructor)(encoder.get_num_labels(),
                                          encoder.get_num_verbs(), role_module)

    constructor = 'build_top_down_baseline'
    tda_role_module = getattr(top_down_baseline,
                              constructor)(encoder.get_num_roles(),
                                           encoder.get_num_verbs(),
                                           encoder.get_num_labels(), encoder)

    constructor = 'build_top_down_query_context'
    caq_model = getattr(top_down_query_context,
                        constructor)(encoder.get_num_roles(),
                                     encoder.get_num_verbs(),
                                     encoder.get_num_labels(), encoder,
                                     tda_role_module)

    constructor = 'build_%s' % args.model
    model = getattr(revgg_caq_joint_eval,
                    constructor)(vgg_verb_model, tda_verb_model, caq_model)

    train_loader = torch.utils.data.DataLoader(train_set,
                                               batch_size=batch_size,
                                               shuffle=True,
                                               num_workers=n_worker)

    dev_set = json.load(open(dataset_folder + '/' + args.dev_file))
    dev_set = imsitu_loader.imsitu_loader(imgset_folder, dev_set, encoder,
                                          'val', encoder.dev_transform)
    dev_loader = torch.utils.data.DataLoader(dev_set,
                                             batch_size=batch_size,
                                             shuffle=True,
                                             num_workers=n_worker)

    test_set = json.load(open(dataset_folder + '/' + args.test_file))
    test_set = imsitu_loader.imsitu_loader(imgset_folder, test_set, encoder,
                                           'test', encoder.dev_transform)
    test_loader = torch.utils.data.DataLoader(test_set,
                                              batch_size=batch_size,
                                              shuffle=True,
                                              num_workers=n_worker)

    if not os.path.exists(args.output_dir):
        os.mkdir(args.output_dir)

    torch.manual_seed(args.seed)
    if args.gpuid >= 0:
        model.cuda()
        torch.cuda.manual_seed(args.seed)
        torch.backends.cudnn.benchmark = True

    #load models
    utils.load_net(args.vgg_verb_model, [model.vgg_model])
    print('successfully loaded vgg_verb_model!')
    utils.load_net(args.tda_verb_model, [model.tda_model])
    print('successfully loaded tda_verb_model!')
    utils.load_net(args.caq_model, [model.caq_model])
    print('successfully loaded caq_model!')

    if args.evaluate:
        top1, top5, val_loss = eval(model,
                                    dev_loader,
                                    encoder,
                                    args.gpuid,
                                    write_to_file=True)

        top1_avg = top1.get_average_results()
        top5_avg = top5.get_average_results()

        avg_score = top1_avg["verb"] + top1_avg["value"] + top1_avg["value-all"] + top5_avg["verb"] + \
                    top5_avg["value"] + top5_avg["value-all"] + top5_avg["value*"] + top5_avg["value-all*"]
        avg_score /= 8

        print('Dev average :{:.2f} {} {}'.format(
            avg_score * 100, utils.format_dict(top1_avg, '{:.2f}', '1-'),
            utils.format_dict(top5_avg, '{:.2f}', '5-')))

    elif args.test:
        top1, top5, val_loss = eval(model,
                                    test_loader,
                                    encoder,
                                    args.gpuid,
                                    write_to_file=True)

        top1_avg = top1.get_average_results()
        top5_avg = top5.get_average_results()

        avg_score = top1_avg["verb"] + top1_avg["value"] + top1_avg["value-all"] + top5_avg["verb"] + \
                    top5_avg["value"] + top5_avg["value-all"] + top5_avg["value*"] + top5_avg["value-all*"]
        avg_score /= 8

        print('Test average :{:.2f} {} {}'.format(
            avg_score * 100, utils.format_dict(top1_avg, '{:.2f}', '1-'),
            utils.format_dict(top5_avg, '{:.2f}', '5-')))
Ejemplo n.º 3
0
def main():

    import argparse
    parser = argparse.ArgumentParser(
        description="imsitu VSRL. Training, evaluation and prediction.")
    parser.add_argument("--gpuid",
                        default=-1,
                        help="put GPU id > -1 in GPU mode",
                        type=int)
    parser.add_argument('--output_dir',
                        type=str,
                        default='./trained_models',
                        help='Location to output the model')
    parser.add_argument('--resume_training',
                        action='store_true',
                        help='Resume training from the model [resume_model]')
    parser.add_argument('--resume_model',
                        type=str,
                        default='',
                        help='The model we resume')
    parser.add_argument('--evaluate',
                        action='store_true',
                        help='Only use the testing mode')
    parser.add_argument('--evaluate_visualize',
                        action='store_true',
                        help='Only use the testing mode to visualize ')
    parser.add_argument('--evaluate_rare',
                        action='store_true',
                        help='Only use the testing mode')
    parser.add_argument('--test',
                        action='store_true',
                        help='Only use the testing mode')
    parser.add_argument('--dataset_folder',
                        type=str,
                        default='./imSitu',
                        help='Location of annotations')
    parser.add_argument('--imgset_dir',
                        type=str,
                        default='./resized_256',
                        help='Location of original images')
    parser.add_argument('--train_file',
                        default="train_freq2000.json",
                        type=str,
                        help='trainfile name')
    parser.add_argument('--dev_file',
                        default="dev_freq2000.json",
                        type=str,
                        help='dev file name')
    parser.add_argument('--test_file',
                        default="test_freq2000.json",
                        type=str,
                        help='test file name')
    parser.add_argument('--model_saving_name',
                        type=str,
                        help='saving name of the outpul model')

    parser.add_argument('--epochs', type=int, default=500)
    parser.add_argument('--model', type=str, default='top_down_baseline')
    parser.add_argument('--batch_size', type=int, default=64)
    parser.add_argument('--seed', type=int, default=1111, help='random seed')
    parser.add_argument('--clip_norm', type=float, default=0.25)
    parser.add_argument('--num_workers', type=int, default=3)

    args = parser.parse_args()

    n_epoch = args.epochs
    batch_size = args.batch_size
    clip_norm = args.clip_norm
    n_worker = args.num_workers

    dataset_folder = args.dataset_folder
    imgset_folder = args.imgset_dir

    train_set = json.load(open(dataset_folder + '/' + args.train_file))

    encoder = imsitu_encoder.imsitu_encoder(train_set)

    train_set = imsitu_loader.imsitu_loader(imgset_folder, train_set, encoder,
                                            'train', encoder.train_transform)

    constructor = 'build_%s' % args.model
    model = getattr(top_down_baseline, constructor)(encoder.get_num_roles(),
                                                    encoder.get_num_verbs(),
                                                    encoder.get_num_labels(),
                                                    encoder)

    train_loader = torch.utils.data.DataLoader(train_set,
                                               batch_size=batch_size,
                                               shuffle=True,
                                               num_workers=n_worker)

    dev_set = json.load(open(dataset_folder + '/' + args.dev_file))
    dev_set = imsitu_loader.imsitu_loader(imgset_folder, dev_set, encoder,
                                          'val', encoder.dev_transform)
    dev_loader = torch.utils.data.DataLoader(dev_set,
                                             batch_size=batch_size,
                                             shuffle=True,
                                             num_workers=n_worker)

    test_set = json.load(open(dataset_folder + '/' + args.test_file))
    test_set = imsitu_loader.imsitu_loader(imgset_folder, test_set, encoder,
                                           'test', encoder.dev_transform)
    test_loader = torch.utils.data.DataLoader(test_set,
                                              batch_size=batch_size,
                                              shuffle=True,
                                              num_workers=n_worker)

    if not os.path.exists(args.output_dir):
        os.mkdir(args.output_dir)

    torch.manual_seed(args.seed)
    if args.gpuid >= 0:
        model.cuda()
        torch.cuda.manual_seed(args.seed)
        torch.backends.cudnn.benchmark = True

    if args.resume_training:
        print('Resume training from: {}'.format(args.resume_model))
        args.train_all = True
        if len(args.resume_model) == 0:
            raise Exception('[pretrained module] not specified')
        utils.load_net(args.resume_model, [model])
        optimizer = torch.optim.Adamax(model.parameters(), lr=1e-3)
        model_name = 'resume_all'

    else:
        print('Training from the scratch.')
        model_name = 'train_full'
        utils.set_trainable(model, True)
        optimizer = torch.optim.Adamax(
            [{
                'params': model.convnet.parameters(),
                'lr': 5e-5
            }, {
                'params': model.role_emb.parameters()
            }, {
                'params': model.verb_emb.parameters()
            }, {
                'params': model.query_composer.parameters()
            }, {
                'params': model.v_att.parameters()
            }, {
                'params': model.q_net.parameters()
            }, {
                'params': model.v_net.parameters()
            }, {
                'params': model.classifier.parameters()
            }],
            lr=1e-3)

    scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.9)

    if args.evaluate:
        top1, top5, val_loss = eval(model, dev_loader, encoder, args.gpuid)

        top1_avg = top1.get_average_results_nouns()
        top5_avg = top5.get_average_results_nouns()

        avg_score = top1_avg["verb"] + top1_avg["value"] + top1_avg["value-all"] + top5_avg["verb"] + \
                    top5_avg["value"] + top5_avg["value-all"] + top5_avg["value*"] + top5_avg["value-all*"]
        avg_score /= 8

        print('Dev average :{:.2f} {} {}'.format(
            avg_score * 100, utils.format_dict(top1_avg, '{:.2f}', '1-'),
            utils.format_dict(top5_avg, '{:.2f}', '5-')))

    elif args.test:
        top1, top5, val_loss = eval(model, test_loader, encoder, args.gpuid)

        top1_avg = top1.get_average_results_nouns()
        top5_avg = top5.get_average_results_nouns()

        avg_score = top1_avg["verb"] + top1_avg["value"] + top1_avg["value-all"] + top5_avg["verb"] + \
                    top5_avg["value"] + top5_avg["value-all"] + top5_avg["value*"] + top5_avg["value-all*"]
        avg_score /= 8

        print('Test average :{:.2f} {} {}'.format(
            avg_score * 100, utils.format_dict(top1_avg, '{:.2f}', '1-'),
            utils.format_dict(top5_avg, '{:.2f}', '5-')))

    else:

        print('Model training started!')
        train(
            model,
            train_loader,
            dev_loader,
            optimizer,
            scheduler,
            n_epoch,
            args.output_dir,
            encoder,
            args.gpuid,
            clip_norm,
            model_name,
            args.model_saving_name,
        )
Ejemplo n.º 4
0
    optimizer = torch.optim.Adamax(filter(lambda p: p.requires_grad,
                                          model.parameters()),
                                   lr=args.lr)

    torch.backends.cudnn.benchmark = True

    #if resuming, load it on cpu or GPUs
    if len(args.resume_model) > 1:
        print('Resume training from: {}'.format(args.resume_model))
        if torch.cuda.is_available():
            device = torch.device('cuda')
        else:
            device = torch.device('cpu')
        path_to_model = pjoin(args.saving_folder, args.resume_model)
        checkpoint = torch.load(path_to_model, map_location=device)
        utils.load_net(path_to_model, [model])

        if torch.cuda.is_available():
            for parameter in model.module.convnet_verbs.parameters():
                parameter.requires_grad = False
            model.module.convnet_verbs.model.fc.requires_grad = True

            for parameter in model.module.convnet_nouns.parameters():
                parameter.requires_grad = False
            model.module.convnet_nouns.model.fc.requires_grad = True
        else:
            for parameter in model.convnet_verbs.parameters():
                parameter.requires_grad = False
            model.convnet_verbs.model.fc.requires_grad = True

            for parameter in model.convnet_nouns.parameters():