Example #1
0
def main():

    import argparse
    parser = argparse.ArgumentParser(
        description="imsitu VSRL. Training, evaluation and prediction.")
    parser.add_argument("--gpuid",
                        default=-1,
                        help="put GPU id > -1 in GPU mode",
                        type=int)
    #parser.add_argument("--command", choices = ["train", "eval", "resume", 'predict'], required = True)
    parser.add_argument('--resume_training',
                        action='store_true',
                        help='Resume training from the model [resume_model]')
    parser.add_argument('--resume_model',
                        type=str,
                        default='',
                        help='The model we resume')
    parser.add_argument('--verb_module',
                        type=str,
                        default='',
                        help='pretrained verb module')
    parser.add_argument('--train_role',
                        action='store_true',
                        help='cnn fix, verb fix, role train from the scratch')
    parser.add_argument(
        '--finetune_verb',
        action='store_true',
        help='cnn fix, verb finetune, role train from the scratch')
    parser.add_argument(
        '--finetune_cnn',
        action='store_true',
        help='cnn finetune, verb finetune, role train from the scratch')
    parser.add_argument('--output_dir',
                        type=str,
                        default='./trained_models',
                        help='Location to output the model')
    parser.add_argument('--evaluate',
                        action='store_true',
                        help='Only use the testing mode')
    parser.add_argument('--test',
                        action='store_true',
                        help='Only use the testing mode')
    parser.add_argument('--dataset_folder',
                        type=str,
                        default='./imSitu',
                        help='Location of annotations')
    parser.add_argument('--imgset_dir',
                        type=str,
                        default='./resized_256',
                        help='Location of original images')
    parser.add_argument('--frcnn_feat_dir',
                        type=str,
                        help='Location of output from detectron')
    #todo: train role module separately with gt verbs

    args = parser.parse_args()

    batch_size = 640
    #lr = 5e-6
    lr = 0.0001
    lr_max = 5e-4
    lr_gamma = 0.1
    lr_step = 25
    clip_norm = 50
    weight_decay = 1e-4
    n_epoch = 500
    n_worker = 3

    #dataset_folder = 'imSitu'
    #imgset_folder = 'resized_256'
    dataset_folder = args.dataset_folder
    imgset_folder = args.imgset_dir

    print('model spec :, top down att with role q ')

    train_set = json.load(open(dataset_folder + "/train.json"))
    imsitu_roleq = json.load(open("imsitu_data/imsitu_questions.json"))
    encoder = imsitu_encoder(train_set, imsitu_roleq)

    model = model_my_new.BaseModel(encoder, args.gpuid)

    # To group up the features
    cnn_features, role_features = utils.group_features_noun(model)

    train_set = imsitu_loader_roleq(imgset_folder, train_set, encoder,
                                    model.train_preprocess())

    train_loader = torch.utils.data.DataLoader(train_set,
                                               batch_size=64,
                                               shuffle=True,
                                               num_workers=n_worker)

    dev_set = json.load(open(dataset_folder + "/dev.json"))
    dev_set = imsitu_loader_roleq(imgset_folder, dev_set, encoder,
                                  model.dev_preprocess())
    dev_loader = torch.utils.data.DataLoader(dev_set,
                                             batch_size=64,
                                             shuffle=True,
                                             num_workers=n_worker)

    test_set = json.load(open(dataset_folder + "/test.json"))
    test_set = imsitu_loader_roleq(imgset_folder, test_set, encoder,
                                   model.dev_preprocess())
    test_loader = torch.utils.data.DataLoader(test_set,
                                              batch_size=64,
                                              shuffle=True,
                                              num_workers=n_worker)

    traindev_set = json.load(open(dataset_folder + "/dev.json"))
    traindev_set = imsitu_loader_roleq(imgset_folder, traindev_set, encoder,
                                       model.dev_preprocess())
    traindev_loader = torch.utils.data.DataLoader(traindev_set,
                                                  batch_size=8,
                                                  shuffle=True,
                                                  num_workers=n_worker)

    utils.set_trainable(model, False)
    if args.train_role:
        print('CNN fix, Verb fix, train role from the scratch from: {}'.format(
            args.verb_module))
        args.train_all = False
        if len(args.verb_module) == 0:
            raise Exception('[pretrained verb module] not specified')
        utils.load_net(args.verb_module, [model.conv, model.verb],
                       ['conv', 'verb'])
        optimizer_select = 1
        model_name = 'cfx_vfx_rtrain'

    elif args.finetune_verb:
        print('CNN fix, Verb finetune, train role from the scratch from: {}'.
              format(args.verb_module))
        args.train_all = True
        if len(args.verb_module) == 0:
            raise Exception('[pretrained verb module] not specified')
        utils.load_net(args.verb_module, [model.conv, model.verb],
                       ['conv', 'verb'])
        optimizer_select = 2
        model_name = 'cfx_vft_rtrain'

    elif args.finetune_cnn:
        print(
            'CNN finetune, Verb finetune, train role from the scratch from: {}'
            .format(args.verb_module))
        args.train_all = True
        if len(args.verb_module) == 0:
            raise Exception('[pretrained verb module] not specified')
        utils.load_net(args.verb_module, [model.conv, model.verb],
                       ['conv', 'verb'])
        optimizer_select = 3
        model_name = 'cft_vft_rtrain'

    elif args.resume_training:
        print('Resume training from: {}'.format(args.resume_model))
        args.train_all = True
        if len(args.resume_model) == 0:
            raise Exception('[pretrained verb module] not specified')
        utils.load_net(args.resume_model, [model])
        optimizer_select = 0
        model_name = 'resume_all'
    else:
        print('Training from the scratch.')
        optimizer_select = 0
        args.train_all = True
        model_name = 'train_full'

    optimizer = utils.get_optimizer_noun(lr, weight_decay, optimizer_select,
                                         cnn_features, role_features)

    if not os.path.exists(args.output_dir):
        os.mkdir(args.output_dir)

    torch.manual_seed(1234)
    if args.gpuid >= 0:
        #print('GPU enabled')
        model.cuda()
        torch.cuda.manual_seed(1234)
        torch.backends.cudnn.deterministic = True

    optimizer = torch.optim.Adamax([{
        'params': cnn_features,
        'lr': 5e-5
    }, {
        'params': role_features
    }],
                                   lr=1e-3)

    #optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)
    #scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=lr_step, gamma=lr_gamma)
    #gradient clipping, grad check
    scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.9)

    if args.evaluate:
        top1, top5, val_loss = eval(model,
                                    dev_loader,
                                    encoder,
                                    args.gpuid,
                                    write_to_file=True)

        top1_avg = top1.get_average_results_nouns()
        top5_avg = top5.get_average_results_nouns()

        avg_score = top1_avg["verb"] + top1_avg["value"] + top1_avg["value-all"] + top5_avg["verb"] + \
                    top5_avg["value"] + top5_avg["value-all"] + top5_avg["value*"] + top5_avg["value-all*"]
        avg_score /= 8

        print('Dev average :{:.2f} {} {}'.format(
            avg_score * 100, utils.format_dict(top1_avg, '{:.2f}', '1-'),
            utils.format_dict(top5_avg, '{:.2f}', '5-')))

        #write results to csv file
        role_dict = top1.role_dict
        fail_val_all = top1.value_all_dict
        pass_val_dict = top1.vall_all_correct

        with open('role_pred_data.json', 'w') as fp:
            json.dump(role_dict, fp, indent=4)

        with open('fail_val_all.json', 'w') as fp:
            json.dump(fail_val_all, fp, indent=4)

        with open('pass_val_all.json', 'w') as fp:
            json.dump(pass_val_dict, fp, indent=4)

        print('Writing predictions to file completed !')

    elif args.test:
        top1, top5, val_loss = eval(model,
                                    test_loader,
                                    encoder,
                                    args.gpuid,
                                    write_to_file=True)

        top1_avg = top1.get_average_results_nouns()
        top5_avg = top5.get_average_results_nouns()

        avg_score = top1_avg["verb"] + top1_avg["value"] + top1_avg["value-all"] + top5_avg["verb"] + \
                    top5_avg["value"] + top5_avg["value-all"] + top5_avg["value*"] + top5_avg["value-all*"]
        avg_score /= 8

        print('Test average :{:.2f} {} {}'.format(
            avg_score * 100, utils.format_dict(top1_avg, '{:.2f}', '1-'),
            utils.format_dict(top5_avg, '{:.2f}', '5-')))

    else:

        print('Model training started!')
        train(model, train_loader, dev_loader, traindev_loader, optimizer,
              scheduler, n_epoch, args.output_dir, encoder, args.gpuid,
              clip_norm, lr_max, model_name, args)
def main():

    import argparse
    parser = argparse.ArgumentParser(
        description="imsitu VSRL. Training, evaluation and prediction.")
    parser.add_argument("--gpuid",
                        default=-1,
                        help="put GPU id > -1 in GPU mode",
                        type=int)
    #parser.add_argument("--command", choices = ["train", "eval", "resume", 'predict'], required = True)
    parser.add_argument('--resume_training',
                        action='store_true',
                        help='Resume training from the model [resume_model]')
    parser.add_argument('--resume_model',
                        type=str,
                        default='',
                        help='The model we resume')
    parser.add_argument('--verb_module',
                        type=str,
                        default='',
                        help='pretrained verb module')
    parser.add_argument('--train_role',
                        action='store_true',
                        help='cnn fix, verb fix, role train from the scratch')
    parser.add_argument(
        '--finetune_verb',
        action='store_true',
        help='cnn fix, verb finetune, role train from the scratch')
    parser.add_argument(
        '--finetune_cnn',
        action='store_true',
        help='cnn finetune, verb finetune, role train from the scratch')
    parser.add_argument('--output_dir',
                        type=str,
                        default='./trained_models',
                        help='Location to output the model')
    #todo: train role module separately with gt verbs

    args = parser.parse_args()

    batch_size = 640
    #lr = 5e-6
    lr = 1e-5
    lr_max = 5e-4
    lr_gamma = 0.1
    lr_step = 25
    clip_norm = 50
    weight_decay = 1e-4
    n_epoch = 500
    n_worker = 3

    dataset_folder = 'imSitu'
    imgset_folder = 'resized_256'

    print(
        'model spec :, 256 hidden, 25 epoch decay, rn_att, 3 layers e-5 init lr decay'
    )

    train_set = json.load(open(dataset_folder + "/train.json"))
    encoder = imsitu_encoder(train_set)

    model = model_vsrl_rnatt.RelationNetworks(encoder, args.gpuid)

    # To group up the features
    cnn_features, verb_features, role_features = utils.group_features(model)

    train_set = imsitu_loader(imgset_folder, train_set, encoder,
                              model.train_preprocess())

    train_loader = torch.utils.data.DataLoader(train_set,
                                               batch_size=24,
                                               shuffle=True,
                                               num_workers=n_worker)

    dev_set = json.load(open(dataset_folder + "/dev.json"))
    dev_set = imsitu_loader(imgset_folder, dev_set, encoder,
                            model.train_preprocess())
    dev_loader = torch.utils.data.DataLoader(dev_set,
                                             batch_size=24,
                                             shuffle=True,
                                             num_workers=n_worker)

    traindev_set = json.load(open(dataset_folder + "/dev.json"))
    traindev_set = imsitu_loader(imgset_folder, traindev_set, encoder,
                                 model.train_preprocess())
    traindev_loader = torch.utils.data.DataLoader(traindev_set,
                                                  batch_size=8,
                                                  shuffle=True,
                                                  num_workers=n_worker)

    utils.set_trainable(model, False)
    if args.train_role:
        print('CNN fix, Verb fix, train role from the scratch from: {}'.format(
            args.verb_module))
        args.train_all = False
        if len(args.verb_module) == 0:
            raise Exception('[pretrained verb module] not specified')
        utils.load_net(args.verb_module, [model.conv, model.verb],
                       ['conv', 'verb'])
        optimizer_select = 1
        model_name = 'cfx_vfx_rtrain'

    elif args.finetune_verb:
        print('CNN fix, Verb finetune, train role from the scratch from: {}'.
              format(args.verb_module))
        args.train_all = True
        if len(args.verb_module) == 0:
            raise Exception('[pretrained verb module] not specified')
        utils.load_net(args.verb_module, [model.conv, model.verb],
                       ['conv', 'verb'])
        optimizer_select = 2
        model_name = 'cfx_vft_rtrain'

    elif args.finetune_cnn:
        print(
            'CNN finetune, Verb finetune, train role from the scratch from: {}'
            .format(args.verb_module))
        args.train_all = True
        if len(args.verb_module) == 0:
            raise Exception('[pretrained verb module] not specified')
        utils.load_net(args.verb_module, [model.conv, model.verb],
                       ['conv', 'verb'])
        optimizer_select = 3
        model_name = 'cft_vft_rtrain'

    elif args.resume_training:
        print('Resume training from: {}'.format(args.resume_model))
        args.train_all = True
        if len(args.resume_model) == 0:
            raise Exception('[pretrained verb module] not specified')
        utils.load_net(args.resume_model, [model])
        optimizer_select = 0
        model_name = 'resume_all'
    else:
        print('Training from the scratch.')
        optimizer_select = 0
        args.train_all = True
        model_name = 'train_full'

    optimizer = utils.get_optimizer(lr, weight_decay, optimizer_select,
                                    cnn_features, verb_features, role_features)

    if not os.path.exists(args.output_dir):
        os.mkdir(args.output_dir)

    if args.gpuid >= 0:
        #print('GPU enabled')
        model.cuda()

    #opt = utils.NoamOpt(256, 1, 4000, optimizer)

    #optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                step_size=lr_step,
                                                gamma=lr_gamma)
    #gradient clipping, grad check

    print('Model training started!')
    train(model, train_loader, dev_loader, traindev_loader, optimizer,
          scheduler, n_epoch, args.output_dir, encoder, args.gpuid, clip_norm,
          lr_max, model_name, args)
Example #3
0
def main(args=None):
    parser = argparse.ArgumentParser(
        description='Simple training script for training a RetinaNet network.')
    parser.add_argument(
        '--train-file',
        help='Path to file containing training annotations (see readme)')
    parser.add_argument(
        '--pretrained_model',
        help='Path to file containing training annotations (see readme)')
    parser.add_argument('--classes-file',
                        help='Path to file containing class list (see readme)')
    parser.add_argument(
        '--val-file',
        help=
        'Path to file containing validation annotations (optional, see readme)'
    )
    parser.add_argument('--role-file', help='Path to role file')
    parser.add_argument('--epochs',
                        help='Number of epochs',
                        type=int,
                        default=50)
    parser.add_argument('--title', type=str, default='')
    parser.add_argument("--resume-epoch", type=int, default=0)
    parser.add_argument("--detach-epoch", type=int, default=12)
    parser.add_argument("--gt-noun-epoch", type=int, default=5)
    parser.add_argument("--hidden-size", type=int, default=1024)
    parser.add_argument("--lr-decrease", type=int, default=10)
    parser.add_argument("--second-lr-decrease", type=int, default=20)
    parser.add_argument("--iteration", type=float, default=100.0)
    parser.add_argument("--lr", type=float, default=.0006)
    parser.add_argument("--batch-size", type=int, default=16)
    parser = parser.parse_args(args)

    writer, log_dir = init_log_dir(parser)

    print('correct version')

    print("loading dev")
    with open('./SWiG_jsons/dev.json') as f:
        dev_gt = json.load(f)
    print("loading imsitu_dpace")
    with open('./SWiG_jsons/imsitu_space.json') as f:
        all = json.load(f)
        verb_orders = all['verbs']
        noun_dict = all['nouns']

    dataloader_train, dataset_train, dataloader_val, dataset_val = init_data(
        parser, verb_orders)
    print("loading model")
    retinanet = model_new.resnet50(num_classes=dataset_train.num_classes(),
                                   num_nouns=dataset_train.num_nouns(),
                                   parser=parser,
                                   pretrained=True)

    utils.load_net(parser.pretrained_model, [retinanet])

    print('Loading pretrained RetinaNet finished!')

    utils.set_trainable(retinanet, False)
    utils.set_trainable(retinanet.vocab_linear, True)
    utils.set_trainable(retinanet.vocab_linear_2, True)
    utils.set_trainable(retinanet.verb_embeding, True)
    utils.set_trainable(retinanet.noun_embedding, True)
    utils.set_trainable(retinanet.regressionModel, True)
    utils.set_trainable(retinanet.classificationModel, True)
    utils.set_trainable(retinanet.rnn, True)
    utils.set_trainable(retinanet.rnn_linear, True)

    optimizer = torch.optim.Adamax([
        {
            'params': retinanet.vocab_linear.parameters()
        },
        {
            'params': retinanet.vocab_linear_2.parameters()
        },
        {
            'params': retinanet.verb_embeding.parameters()
        },
        {
            'params': retinanet.noun_embedding.parameters()
        },
        {
            'params': retinanet.regressionModel.parameters()
        },
        {
            'params': retinanet.classificationModel.parameters()
        },
        {
            'params': retinanet.rnn.parameters()
        },
        {
            'params': retinanet.rnn_linear.parameters()
        },
    ],
                                   lr=1e-3)

    scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.9)

    #retinanet = torch.nn.DataParallel(retinanet).cuda()
    retinanet = retinanet.cuda()

    #optimizer = optim.Adam(retinanet.parameters(), lr=parser.lr)

    print('weights loaded')

    best_eval = 0

    for epoch_num in range(parser.resume_epoch, parser.epochs):
        train(retinanet, optimizer, dataloader_train, parser, epoch_num,
              writer)
        #torch.save({'state_dict': retinanet.module.state_dict(), 'optimizer': optimizer.state_dict()}, log_dir + '/checkpoints/retinanet_{}.pth'.format(epoch_num))
        print('Evaluating dataset')
        eval_avg = evaluate(retinanet, dataloader_val, parser, dataset_val,
                            dataset_train, verb_orders, dev_gt, epoch_num,
                            writer, noun_dict)

        if eval_avg > best_eval:
            print('New best model at epoch ', epoch_num)

        scheduler.step()
def main():

    import argparse
    parser = argparse.ArgumentParser(
        description="imsitu VSRL. Training, evaluation and prediction.")
    parser.add_argument("--gpuid",
                        default=-1,
                        help="put GPU id > -1 in GPU mode",
                        type=int)
    #parser.add_argument("--command", choices = ["train", "eval", "resume", 'predict'], required = True)
    parser.add_argument('--resume_training',
                        action='store_true',
                        help='Resume training from the model [resume_model]')
    parser.add_argument('--resume_model',
                        type=str,
                        default='',
                        help='The model we resume')
    parser.add_argument('--verb_module',
                        type=str,
                        default='',
                        help='pretrained verb module')
    parser.add_argument('--role_module',
                        type=str,
                        default='',
                        help='pretrained role module')
    parser.add_argument('--train_role',
                        action='store_true',
                        help='cnn fix, verb fix, role train from the scratch')
    parser.add_argument(
        '--finetune_verb',
        action='store_true',
        help='cnn fix, verb finetune, role train from the scratch')
    parser.add_argument(
        '--finetune_cnn',
        action='store_true',
        help='cnn finetune, verb finetune, role train from the scratch')
    parser.add_argument('--output_dir',
                        type=str,
                        default='./trained_models',
                        help='Location to output the model')
    parser.add_argument('--evaluate',
                        action='store_true',
                        help='Only use the testing mode')
    parser.add_argument('--test',
                        action='store_true',
                        help='Only use the testing mode')
    parser.add_argument('--dataset_folder',
                        type=str,
                        default='./imSitu',
                        help='Location of annotations')
    parser.add_argument('--imgset_dir',
                        type=str,
                        default='./resized_256',
                        help='Location of original images')
    parser.add_argument('--frcnn_feat_dir',
                        type=str,
                        help='Location of output from detectron')
    #todo: train role module separately with gt verbs

    args = parser.parse_args()

    batch_size = 640
    #lr = 5e-6
    lr = 0.0001
    lr_max = 5e-4
    lr_gamma = 0.1
    lr_step = 25
    clip_norm = 50
    weight_decay = 1e-4
    n_epoch = 500
    n_worker = 3

    #dataset_folder = 'imSitu'
    #imgset_folder = 'resized_256'
    dataset_folder = args.dataset_folder
    imgset_folder = args.imgset_dir

    print('model spec :, verb role with context ')

    train_set = json.load(open(dataset_folder + "/train.json"))
    imsitu_roleq = json.load(open("imsitu_data/imsitu_questions_prev.json"))
    encoder = imsitu_encoder(train_set, imsitu_roleq)

    model = model_verbmlp_roletd_new.BaseModel(encoder, args.gpuid)

    # To group up the features
    #all verb and role feat are under role as it's a single unit

    train_set = imsitu_loader_roleq(imgset_folder, train_set, encoder,
                                    model.train_preprocess())

    train_loader = torch.utils.data.DataLoader(train_set,
                                               batch_size=64,
                                               shuffle=True,
                                               num_workers=n_worker)

    dev_set = json.load(open(dataset_folder + "/dev.json"))
    dev_set = imsitu_loader_roleq(imgset_folder, dev_set, encoder,
                                  model.dev_preprocess())
    dev_loader = torch.utils.data.DataLoader(dev_set,
                                             batch_size=64,
                                             shuffle=True,
                                             num_workers=n_worker)

    test_set = json.load(open(dataset_folder + "/test.json"))
    test_set = imsitu_loader_roleq(imgset_folder, test_set, encoder,
                                   model.dev_preprocess())
    test_loader = torch.utils.data.DataLoader(test_set,
                                              batch_size=64,
                                              shuffle=True,
                                              num_workers=n_worker)

    traindev_set = json.load(open(dataset_folder + "/dev.json"))
    traindev_set = imsitu_loader_roleq(imgset_folder, traindev_set, encoder,
                                       model.dev_preprocess())
    traindev_loader = torch.utils.data.DataLoader(traindev_set,
                                                  batch_size=8,
                                                  shuffle=True,
                                                  num_workers=n_worker)

    utils.set_trainable(model, False)

    utils.load_net(args.verb_module, [model.verb])
    utils.load_net(args.role_module, [model.roles])

    if not os.path.exists(args.output_dir):
        os.mkdir(args.output_dir)

    torch.manual_seed(1234)
    if args.gpuid >= 0:
        #print('GPU enabled')
        model.cuda()
        torch.cuda.manual_seed(1234)
        torch.backends.cudnn.deterministic = True

    top1, top5, val_loss = eval(model,
                                dev_loader,
                                encoder,
                                args.gpuid,
                                write_to_file=True)
    top1_avg = top1.get_average_results()

    avg_score = top1_avg["verb"] + top1_avg["value"] + top1_avg["value-all"]
    avg_score /= 3

    print('Dev average :{:.2f} {} '.format(
        avg_score * 100, utils.format_dict(top1_avg, '{:.2f}', '1-')))
Example #5
0
def main(args=None):
    parser = argparse.ArgumentParser(
        description='Simple training script for training a RetinaNet network.')
    parser.add_argument(
        '--train-file',
        help='Path to file containing training annotations (see readme)')
    #parser.add_argument('--pretrained_sr_model', help='Path to file containing training annotations (see readme)')
    parser.add_argument('--classes-file',
                        help='Path to file containing class list (see readme)')
    parser.add_argument(
        '--val-file',
        help=
        'Path to file containing validation annotations (optional, see readme)'
    )
    parser.add_argument('--role-file', help='Path to role file')
    parser.add_argument('--epochs',
                        help='Number of epochs',
                        type=int,
                        default=50)
    parser.add_argument('--title', type=str, default='')
    parser.add_argument("--resume-epoch", type=int, default=0)
    parser.add_argument("--detach-epoch", type=int, default=12)
    parser.add_argument("--gt-noun-epoch", type=int, default=5)
    parser.add_argument("--hidden-size", type=int, default=1024)
    parser.add_argument("--lr-decrease", type=int, default=10)
    parser.add_argument("--second-lr-decrease", type=int, default=20)
    parser.add_argument("--iteration", type=float, default=100.0)
    parser.add_argument("--lr", type=float, default=.0006)
    parser.add_argument("--batch-size", type=int, default=16)
    parser.add_argument('--output_dir',
                        type=str,
                        default='./trained_models',
                        help='Location to output the model')
    parser.add_argument('--model_saving_name',
                        type=str,
                        help='saving name of the outpul model')
    parser = parser.parse_args(args)

    writer, log_dir = init_log_dir(parser)

    if not os.path.exists(parser.output_dir):
        os.mkdir(parser.output_dir)

    print("loading dev")
    with open('./SWiG_jsons/dev.json') as f:
        dev_gt = json.load(f)
    print("loading imsitu_dpace")
    with open('./SWiG_jsons/imsitu_space.json') as f:
        all = json.load(f)
        verb_orders = all['verbs']
        noun_dict = all['nouns']

    dataloader_train, dataset_train, dataloader_val, dataset_val = init_data(
        parser, verb_orders)
    retinanet = model_setup7.resnet50(num_classes=dataset_train.num_classes(),
                                      num_nouns=dataset_train.num_nouns(),
                                      parser=parser,
                                      pretrained=True)

    #utils.load_net(parser.pretrained_sr_model, [retinanet])

    #print('Loading pretrained Resnet Based SR model finished!')

    utils.set_trainable(retinanet, True)

    optimizer = torch.optim.Adamax([
        {
            'params': retinanet.conv1.parameters()
        },
        {
            'params': retinanet.bn1.parameters()
        },
        {
            'params': retinanet.layer1.parameters()
        },
        {
            'params': retinanet.layer2.parameters()
        },
        {
            'params': retinanet.layer3.parameters()
        },
        {
            'params': retinanet.layer4.parameters()
        },
        {
            'params': retinanet.verb_embeding.parameters()
        },
        {
            'params': retinanet.vrole_combo_embedding.parameters()
        },
        {
            'params': retinanet.query_composer.parameters()
        },
        {
            'params': retinanet.v_att.parameters()
        },
        {
            'params': retinanet.q_net.parameters()
        },
        {
            'params': retinanet.v_net.parameters()
        },
        {
            'params': retinanet.gnn.parameters()
        },
        {
            'params': retinanet.fpn.parameters()
        },
        {
            'params': retinanet.regressionModel.parameters()
        },
        {
            'params': retinanet.classificationModel.parameters()
        },
        {
            'params': retinanet.gnn_linear.parameters()
        },
        {
            'params': retinanet.noun_classifier_roi.parameters()
        },
    ],
                                   lr=5e-4)

    scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.9)

    retinanet = torch.nn.DataParallel(retinanet).cuda()
    #retinanet = retinanet.cuda()

    #optimizer = optim.Adam(retinanet.parameters(), lr=parser.lr)

    best_eval = 0

    for epoch_num in range(parser.resume_epoch, parser.epochs):
        train(retinanet, optimizer, dataloader_train, parser, epoch_num,
              writer)
        #torch.save({'state_dict': retinanet.module.state_dict(), 'optimizer': optimizer.state_dict()}, log_dir + '/checkpoints/retinanet_{}.pth'.format(epoch_num))
        print('Evaluating dataset')
        eval_avg = evaluate(retinanet, dataloader_val, parser, dataset_val,
                            dataset_train, verb_orders, dev_gt, epoch_num,
                            writer, noun_dict)

        if eval_avg > best_eval:
            best_eval = eval_avg
            torch.save(
                {
                    'state_dict': retinanet.module.state_dict(),
                    'optimizer': optimizer.state_dict()
                },
                parser.output_dir + "/{}.pth".format(parser.model_saving_name))
            print('New best model at epoch ', epoch_num)

        scheduler.step()
Example #6
0
def main():

    import argparse
    parser = argparse.ArgumentParser(
        description="imsitu VSRL. Training, evaluation and prediction.")
    parser.add_argument("--gpuid",
                        default=-1,
                        help="put GPU id > -1 in GPU mode",
                        type=int)
    #parser.add_argument("--command", choices = ["train", "eval", "resume", 'predict'], required = True)
    parser.add_argument('--resume_training',
                        action='store_true',
                        help='Resume training from the model [resume_model]')
    parser.add_argument('--resume_model',
                        type=str,
                        default='',
                        help='The model we resume')
    parser.add_argument('--verb_module',
                        type=str,
                        default='',
                        help='pretrained verb module')
    parser.add_argument('--train_role',
                        action='store_true',
                        help='cnn fix, verb fix, role train from the scratch')
    parser.add_argument(
        '--finetune_verb',
        action='store_true',
        help='cnn fix, verb finetune, role train from the scratch')
    parser.add_argument(
        '--finetune_cnn',
        action='store_true',
        help='cnn finetune, verb finetune, role train from the scratch')
    parser.add_argument('--output_dir',
                        type=str,
                        default='./trained_models',
                        help='Location to output the model')
    parser.add_argument('--evaluate',
                        action='store_true',
                        help='Only use the testing mode')
    #todo: train role module separately with gt verbs

    args = parser.parse_args()

    batch_size = 640
    #lr = 5e-6
    lr = 0.0001
    lr_max = 5e-4
    lr_gamma = 0.1
    lr_step = 25
    clip_norm = 50
    weight_decay = 1e-4
    n_epoch = 500
    n_worker = 3

    dataset_folder = 'imSitu'
    imgset_folder = 'resized_256'

    train_set = json.load(open(dataset_folder + "/train.json"))
    encoder = imsitu_encoder(train_set)

    model = model_vsrl_small_finetune.RelationNetworks(encoder, args.gpuid)

    # To group up the features
    cnn_features, verb_features, role_features = utils.group_features(model)

    train_set = imsitu_loader(imgset_folder, train_set, encoder,
                              model.train_preprocess())

    train_loader = torch.utils.data.DataLoader(train_set,
                                               batch_size=32,
                                               shuffle=True,
                                               num_workers=n_worker)

    dev_set = json.load(open(dataset_folder + "/dev.json"))
    dev_set = imsitu_loader(imgset_folder, dev_set, encoder,
                            model.train_preprocess())
    dev_loader = torch.utils.data.DataLoader(dev_set,
                                             batch_size=32,
                                             shuffle=True,
                                             num_workers=n_worker)

    traindev_set = json.load(open(dataset_folder + "/dev.json"))
    traindev_set = imsitu_loader(imgset_folder, traindev_set, encoder,
                                 model.train_preprocess())
    traindev_loader = torch.utils.data.DataLoader(traindev_set,
                                                  batch_size=8,
                                                  shuffle=True,
                                                  num_workers=n_worker)

    utils.set_trainable(model, False)
    if args.train_role:
        print('CNN fix, Verb fix, train role from the scratch from: {}'.format(
            args.verb_module))
        args.train_all = False
        if len(args.verb_module) == 0:
            raise Exception('[pretrained verb module] not specified')
        utils.load_net(args.verb_module, [model.conv, model.verb],
                       ['conv', 'verb'])
        optimizer_select = 1
        model_name = 'cfx_vfx_rtrain'

    elif args.finetune_verb:
        print('CNN fix, Verb finetune, train role from the scratch from: {}'.
              format(args.verb_module))
        args.train_all = True
        if len(args.verb_module) == 0:
            raise Exception('[pretrained verb module] not specified')
        utils.load_net(args.verb_module, [model.conv, model.verb],
                       ['conv', 'verb'])
        optimizer_select = 2
        model_name = 'cfx_vft_rtrain'

    elif args.finetune_cnn:
        print(
            'CNN finetune, Verb finetune, train role from the scratch from: {}'
            .format(args.verb_module))
        args.train_all = True
        if len(args.verb_module) == 0:
            raise Exception('[pretrained verb module] not specified')
        utils.load_net(args.verb_module, [model.conv, model.verb],
                       ['conv', 'verb'])
        optimizer_select = 3
        model_name = 'cft_vft_rtrain'

    elif args.resume_training:
        print('Resume training from: {}'.format(args.resume_model))
        args.train_all = True
        if len(args.resume_model) == 0:
            raise Exception('[pretrained verb module] not specified')
        utils.load_net(args.resume_model, [model])
        optimizer_select = 0
        model_name = 'resume_all'
    else:
        if not args.evaluate:
            print('Training from the scratch.')
        optimizer_select = 0
        args.train_all = True
        model_name = 'train_full'

    optimizer = utils.get_optimizer(lr, weight_decay, optimizer_select,
                                    cnn_features, verb_features, role_features)

    if not os.path.exists(args.output_dir):
        os.mkdir(args.output_dir)

    if args.gpuid >= 0:
        #print('GPU enabled')
        model.cuda()

    #optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                step_size=lr_step,
                                                gamma=lr_gamma)
    #gradient clipping, grad check

    if args.evaluate:
        top1, top5, val_loss = eval(model,
                                    dev_loader,
                                    encoder,
                                    args.gpuid,
                                    write_to_file=True)

        top1_avg = top1.get_average_results()
        top5_avg = top5.get_average_results()

        avg_score = top1_avg["verb"] + top1_avg["value"] + top1_avg["value-all"] + top5_avg["verb"] + \
                    top5_avg["value"] + top5_avg["value-all"]
        avg_score /= 8

        print('Dev average :{:.2f} {} {}'.format(
            avg_score * 100, utils.format_dict(top1_avg, '{:.2f}', '1-'),
            utils.format_dict(top5_avg, '{:.2f}', '5-')))

        #write results to csv file
        gt_labels = top1.gt_situation
        pred_labels = top1.predicted_situation
        verb_pred = top1.verb_pred

        with open("gt_rn_only.csv", "w") as f:
            writer = csv.writer(f)
            writer.writerows(gt_labels)

        with open("pred_rn_only.csv", "w") as f:
            writer = csv.writer(f)
            writer.writerows(pred_labels)

        with open("verbpred_rn_only.csv", "w") as f:
            writer = csv.writer(f)
            writer.writerow(['verb', 'total', 'predicted'])
            for key, value in verb_pred.items():
                writer.writerow([key, value[0], value[1]])

        print('Writing predictions to file completed !')

    else:

        print('Model training started!')
        train(model, train_loader, dev_loader, traindev_loader, optimizer,
              scheduler, n_epoch, args.output_dir, encoder, args.gpuid,
              clip_norm, lr_max, model_name, args)
    ### STOP EDITING HERE .

    # Save the list of classes for prediction mode later
    class_list = utils.get_subfolders(TRAIN_DIR)
    utils.save_class_list(
        OUT_DIR,
        class_list,
        model_name=args.model,
        dataset_name=os.path.basename(args.dataset),
    )

    optim = eval(args.optimizer)(lr=args.lr)
    if args.continue_training is not None:
        finetune_model = load_model(args.continue_training)
        if args.transfer_strategy == "finetune":
            utils.set_trainable(finetune_model, True)
    else:
        finetune_model = utils.build_finetune_model(
            base_model,
            dropout=args.dropout,
            fc_layers=FC_LAYERS,
            num_classes=len(class_list),
            as_fixed_feature_extractor=True
            if args.transfer_strategy == "fixed"
            else False,
            skip_interval=args.skip_interval,
        )

    finetune_model.compile(optim, loss="categorical_crossentropy", metrics=["accuracy"])
    if args.summarize_model:
        finetune_model.summary()
def set_learnable(net):
    utils.set_trainable(net, False)
    utils.set_trainable(net.img_net, True)
    utils.set_trainable(net.imp_net, True)
    utils.set_trainable(net.mask_net, True)
    dictionary = Dictionary.load_from_file('data/dictionary.pkl')
    train_dset = VQAFeatureDataset_withmask('train', dictionary)
    eval_dset = VQAFeatureDataset_withmask('val', dictionary)
    batch_size = args.batch_size

    constructor = 'build_baseline0_newatt'
    baseline = getattr(base_model, constructor)(train_dset, args.num_hid)

    constructor = 'build_%s' % args.model
    model = getattr(pretrained_tda_caq_model,
                    constructor)(train_dset, args.num_hid, baseline).cuda()
    model.w_emb.init_embedding('data/glove6b_init_300d.npy')

    utils.load_net(args.pretrained_tda_model, [model.tda_model], ['module'])

    utils.set_trainable(model.tda_model, False)

    model = nn.DataParallel(model).cuda()

    #seventyfive = list(range(0, int(math.ceil(len(train_dset) * 0.75))))
    #trainset_1 = torch.utils.data.Subset(train_dset, seventyfive)

    train_loader = DataLoader(train_dset,
                              batch_size,
                              shuffle=True,
                              num_workers=1)
    eval_loader = DataLoader(eval_dset,
                             batch_size,
                             shuffle=True,
                             num_workers=1)
    train(model, train_loader, eval_loader, args.epochs, args.output)
Example #10
0
def main():

    import argparse
    parser = argparse.ArgumentParser(
        description="imsitu VSRL. Training, evaluation and prediction.")
    parser.add_argument("--gpuid",
                        default=-1,
                        help="put GPU id > -1 in GPU mode",
                        type=int)
    #parser.add_argument("--command", choices = ["train", "eval", "resume", 'predict'], required = True)
    parser.add_argument('--resume_training',
                        action='store_true',
                        help='Resume training from the model [resume_model]')
    parser.add_argument('--resume_model',
                        type=str,
                        default='',
                        help='The model we resume')
    parser.add_argument('--verb_module',
                        type=str,
                        default='',
                        help='pretrained verb module')
    parser.add_argument('--train_role',
                        action='store_true',
                        help='cnn fix, verb fix, role train from the scratch')
    parser.add_argument(
        '--finetune_verb',
        action='store_true',
        help='cnn fix, verb finetune, role train from the scratch')
    parser.add_argument(
        '--finetune_cnn',
        action='store_true',
        help='cnn finetune, verb finetune, role train from the scratch')
    parser.add_argument('--output_dir',
                        type=str,
                        default='./trained_models',
                        help='Location to output the model')
    parser.add_argument('--evaluate',
                        action='store_true',
                        help='Only use the testing mode')
    parser.add_argument('--test',
                        action='store_true',
                        help='Only use the testing mode')
    parser.add_argument('--dataset_folder',
                        type=str,
                        default='./imSitu',
                        help='Location of annotations')
    parser.add_argument('--imgset_dir',
                        type=str,
                        default='./resized_256',
                        help='Location of original images')
    parser.add_argument('--frcnn_feat_dir',
                        type=str,
                        help='Location of output from detectron')
    parser.add_argument('--batch_size', type=int, default=64)
    #todo: train role module separately with gt verbs

    args = parser.parse_args()

    batch_size = args.batch_size
    #lr = 5e-6
    lr = 0.0001
    lr_max = 5e-4
    lr_gamma = 0.1
    lr_step = 25
    clip_norm = 50
    weight_decay = 1e-4
    n_epoch = 500
    n_worker = 3

    #dataset_folder = 'imSitu'
    #imgset_folder = 'resized_256'
    dataset_folder = args.dataset_folder
    imgset_folder = args.imgset_dir

    train_set = json.load(open(dataset_folder + "/updated_train_new.json"))

    model = model_resnet_imgfeat_extractor.BaseModel()

    # To group up the features

    train_set = imsitu_loader_resnet_featextract(imgset_folder, train_set,
                                                 model.train_preprocess())

    train_loader = torch.utils.data.DataLoader(train_set,
                                               batch_size=batch_size,
                                               shuffle=False,
                                               num_workers=n_worker)

    dev_set = json.load(open(dataset_folder + "/dev.json"))
    dev_set = imsitu_loader_resnet_featextract(imgset_folder, dev_set,
                                               model.dev_preprocess())
    dev_loader = torch.utils.data.DataLoader(dev_set,
                                             batch_size=batch_size,
                                             shuffle=False,
                                             num_workers=n_worker)

    test_set = json.load(open(dataset_folder + "/test.json"))
    test_set = imsitu_loader_resnet_featextract(imgset_folder, test_set,
                                                model.dev_preprocess())
    test_loader = torch.utils.data.DataLoader(test_set,
                                              batch_size=batch_size,
                                              shuffle=False,
                                              num_workers=n_worker)

    utils.set_trainable(model, False)

    if args.resume_training:
        print('Resume training from: {}'.format(args.resume_model))
        args.train_all = True
        if len(args.resume_model) == 0:
            raise Exception('[pretrained verb module] not specified')
        utils.load_net(args.resume_model, [model])

    if args.gpuid >= 0:
        model.cuda()
    extract_features(model, 'train', train_loader, args.gpuid,
                     len(train_loader) * batch_size)
    extract_features(model, 'val', dev_loader, args.gpuid,
                     len(dev_loader) * batch_size)
    extract_features(model, 'test', test_loader, args.gpuid,
                     len(test_loader) * batch_size)
    '''print('rechecking')