コード例 #1
0
def main():

    import argparse
    parser = argparse.ArgumentParser(description="imsitu VSRL. Training, evaluation and prediction.")
    parser.add_argument("--gpuid", default=-1, help="put GPU id > -1 in GPU mode", type=int)
    parser.add_argument("--command", choices = ["train", "eval", "resume", 'predict'], required = True)
    parser.add_argument("--weights_file", help="the model to start from")

    args = parser.parse_args()

    batch_size = 640
    #lr = 1e-5
    lr = 0.00001
    lr_max = 5e-4
    lr_gamma = 0.1
    lr_step = 25
    clip_norm = 50
    weight_decay = 1e-4
    n_epoch = 500
    n_worker = 3

    print('LR scheme : lr decay, vgg, fc as per gnn paper batch 64', 1e-5, 0.1,25)

    dataset_folder = 'imSitu'
    imgset_folder = 'resized_256'

    train_set = json.load(open(dataset_folder + "/train.json"))
    encoder = imsitu_encoder(train_set)

    model = model_verb_small.RelationNetworks(encoder, args.gpuid)

    train_set = imsitu_loader(imgset_folder, train_set, encoder, model.train_preprocess())

    train_loader = torch.utils.data.DataLoader(train_set, batch_size=64, shuffle=True, num_workers=n_worker)

    dev_set = json.load(open(dataset_folder +"/dev.json"))
    dev_set = imsitu_loader(imgset_folder, dev_set, encoder, model.train_preprocess())
    dev_loader = torch.utils.data.DataLoader(dev_set, batch_size=64, shuffle=True, num_workers=n_worker)

    traindev_set = json.load(open(dataset_folder +"/dev.json"))
    traindev_set = imsitu_loader(imgset_folder, traindev_set, encoder, model.train_preprocess())
    traindev_loader = torch.utils.data.DataLoader(traindev_set, batch_size=8, shuffle=True, num_workers=n_worker)

    if args.command == "resume":
        print ("loading model weights...")
        model.load_state_dict(torch.load(args.weights_file))

    if args.gpuid >= 0:
        #print('GPU enabled')
        model.cuda()

    optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=lr_step, gamma=lr_gamma)
    '''optimizer = utils.CosineAnnealingWR(0.01,1200000 , 50,
            torch.optim.Adam(model.parameters(), lr=0, betas=(0.9, 0.98), eps=1e-9))'''

    #gradient clipping, grad check

    print('Model training started!')
    train(model, train_loader, dev_loader, traindev_loader, optimizer, scheduler, n_epoch, 'trained_models', encoder, args.gpuid, clip_norm, lr_max)
コード例 #2
0
def main():

    import argparse
    parser = argparse.ArgumentParser(
        description="imsitu VSRL. Training, evaluation and prediction.")
    parser.add_argument("--gpuid",
                        default=-1,
                        help="put GPU id > -1 in GPU mode",
                        type=int)

    args = parser.parse_args()

    dataset_folder = 'imSitu'
    imgset_folder = 'resized_256'

    train_set = json.load(open(dataset_folder + "/train.json"))
    encoder = imsitu_encoder(train_set)

    model = cnn_linear_marginal.baseline(encoder, args.gpuid)

    train_set = imsitu_loader(imgset_folder, train_set, encoder,
                              model.train_preprocess())

    train_loader = torch.utils.data.DataLoader(train_set,
                                               batch_size=64,
                                               shuffle=True,
                                               num_workers=0)

    dev_set = json.load(open(dataset_folder + "/dev.json"))
    dev_set = imsitu_loader(imgset_folder, dev_set, encoder,
                            model.train_preprocess())
    dev_loader = torch.utils.data.DataLoader(dev_set,
                                             batch_size=64,
                                             shuffle=True,
                                             num_workers=4)

    if args.gpuid >= 0:
        #print('GPU enabled')
        model.cuda()
    lr_set = [0.001]
    decay_set = [5e-4]

    for lr in lr_set:
        for decay in decay_set:
            #lr, weight decay user param
            print('CURRENT PARAM SET : lr, decay :', lr, decay)
            optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad,
                                                model.parameters()),
                                         lr=lr,
                                         weight_decay=decay)
            scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                        step_size=10,
                                                        gamma=0.1)
            #gradient clipping, grad check

            print('Model training started!')
            train(model, train_loader, dev_loader, optimizer, scheduler, 200,
                  'trained_models', encoder, args.gpuid)
コード例 #3
0
ファイル: main.py プロジェクト: thilinicooray/VSRL
def main():

    import argparse
    parser = argparse.ArgumentParser(
        description="imsitu VSRL. Training, evaluation and prediction.")
    parser.add_argument("--gpuid",
                        default=-1,
                        help="put GPU id > -1 in GPU mode",
                        type=int)

    args = parser.parse_args()

    dataset_folder = 'imsitu_data'
    imgset_folder = 'of500_images_resized'

    train_set = json.load(open(dataset_folder + "/train.json"))
    encoder = imsitu_encoder(train_set)
    model = baseline_model.baseline(encoder, args.gpuid)

    train_set = imsitu_loader(imgset_folder, train_set, encoder,
                              model.train_preprocess())

    train_loader = torch.utils.data.DataLoader(train_set,
                                               batch_size=1,
                                               shuffle=True,
                                               num_workers=3)

    dev_set = json.load(open(dataset_folder + "/dev.json"))
    dev_set = imsitu_loader(imgset_folder, dev_set, encoder,
                            model.train_preprocess())
    dev_loader = torch.utils.data.DataLoader(dev_set,
                                             batch_size=64,
                                             shuffle=True,
                                             num_workers=3)

    if args.gpuid >= 0:
        #print('GPU enabled')
        model.cuda()

    #lr, weight decay user param
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=0.01,
                                 weight_decay=5e-4)
    #gradient clipping, grad check

    print('Model training started!')
    train(model, train_loader, dev_loader, optimizer, 200, 'trained_models',
          encoder, args.gpuid)
コード例 #4
0
def main():

    import argparse
    parser = argparse.ArgumentParser(
        description="imsitu VSRL. Training, evaluation and prediction.")
    parser.add_argument("--gpuid",
                        default=-1,
                        help="put GPU id > -1 in GPU mode",
                        type=int)
    #parser.add_argument("--command", choices = ["train", "eval", "resume", 'predict'], required = True)
    parser.add_argument('--resume_training',
                        action='store_true',
                        help='Resume training from the model [resume_model]')
    parser.add_argument('--resume_model',
                        type=str,
                        default='',
                        help='The model we resume')
    parser.add_argument('--verb_module',
                        type=str,
                        default='',
                        help='pretrained verb module')
    parser.add_argument('--train_role',
                        action='store_true',
                        help='cnn fix, verb fix, role train from the scratch')
    parser.add_argument(
        '--finetune_verb',
        action='store_true',
        help='cnn fix, verb finetune, role train from the scratch')
    parser.add_argument(
        '--finetune_cnn',
        action='store_true',
        help='cnn finetune, verb finetune, role train from the scratch')
    parser.add_argument('--output_dir',
                        type=str,
                        default='./trained_models',
                        help='Location to output the model')
    #todo: train role module separately with gt verbs

    args = parser.parse_args()

    batch_size = 640
    #lr = 5e-6
    lr = 0.0001
    lr_max = 5e-4
    lr_gamma = 0.1
    lr_step = 25
    clip_norm = 50
    weight_decay = 1e-4
    n_epoch = 500
    n_worker = 3

    dataset_folder = 'imSitu'
    imgset_folder = 'resized_256'

    print(
        'model spec :, mac net v pred for training and loss calc normalizing from only matching role count '
    )

    train_set = json.load(open(dataset_folder + "/train.json"))
    encoder = imsitu_encoder(train_set)

    model = mac_model_with_verb.E2ENetwork(encoder, args.gpuid)

    # To group up the features
    cnn_features, verb_features, role_features = utils.group_features_single(
        model)

    train_set = imsitu_loader(imgset_folder, train_set, encoder,
                              model.train_preprocess())

    train_loader = torch.utils.data.DataLoader(train_set,
                                               batch_size=64,
                                               shuffle=True,
                                               num_workers=n_worker)

    dev_set = json.load(open(dataset_folder + "/dev.json"))
    dev_set = imsitu_loader(imgset_folder, dev_set, encoder,
                            model.dev_preprocess())
    dev_loader = torch.utils.data.DataLoader(dev_set,
                                             batch_size=64,
                                             shuffle=True,
                                             num_workers=n_worker)

    traindev_set = json.load(open(dataset_folder + "/dev.json"))
    traindev_set = imsitu_loader(imgset_folder, traindev_set, encoder,
                                 model.dev_preprocess())
    traindev_loader = torch.utils.data.DataLoader(traindev_set,
                                                  batch_size=8,
                                                  shuffle=True,
                                                  num_workers=n_worker)

    utils.set_trainable(model, False)
    if args.train_role:
        print('CNN fix, Verb fix, train role from the scratch from: {}'.format(
            args.verb_module))
        args.train_all = False
        if len(args.verb_module) == 0:
            raise Exception('[pretrained verb module] not specified')
        utils.load_net(args.verb_module, [model.conv, model.verb],
                       ['conv', 'verb'])
        optimizer_select = 1
        model_name = 'cfx_vfx_rtrain'

    elif args.finetune_verb:
        print('CNN fix, Verb finetune, train role from the scratch from: {}'.
              format(args.verb_module))
        args.train_all = True
        if len(args.verb_module) == 0:
            raise Exception('[pretrained verb module] not specified')
        utils.load_net(args.verb_module, [model.conv, model.verb],
                       ['conv', 'verb'])
        optimizer_select = 2
        model_name = 'cfx_vft_rtrain'

    elif args.finetune_cnn:
        print(
            'CNN finetune, Verb finetune, train role from the scratch from: {}'
            .format(args.verb_module))
        args.train_all = True
        if len(args.verb_module) == 0:
            raise Exception('[pretrained verb module] not specified')
        utils.load_net(args.verb_module, [model.conv, model.verb],
                       ['conv', 'verb'])
        optimizer_select = 3
        model_name = 'cft_vft_rtrain'

    elif args.resume_training:
        print('Resume training from: {}'.format(args.resume_model))
        args.train_all = True
        if len(args.resume_model) == 0:
            raise Exception('[pretrained verb module] not specified')
        utils.load_net(args.resume_model, [model])
        optimizer_select = 0
        model_name = 'resume_all'
    else:
        print('Training from the scratch.')
        optimizer_select = 0
        args.train_all = True
        model_name = 'train_full'

    optimizer = utils.get_optimizer_single(lr, weight_decay, optimizer_select,
                                           cnn_features, verb_features,
                                           role_features)

    if not os.path.exists(args.output_dir):
        os.mkdir(args.output_dir)

    if args.gpuid >= 0:
        #print('GPU enabled')
        model.cuda()
    '''optimizer = torch.optim.Adam([{'params': model.conv.parameters(), 'lr': 5e-5},
                                  {'params': model.verb.parameters(), 'lr': 5e-5},
                                  {'params': model.role_labeller.parameters()}],
                                 lr=1e-3)'''

    #optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)
    #scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=lr_step, gamma=lr_gamma)
    #gradient clipping, grad check
    scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.9)

    print('Model training started!')
    train(model, train_loader, dev_loader, traindev_loader, optimizer,
          scheduler, n_epoch, args.output_dir, encoder, args.gpuid, clip_norm,
          lr_max, model_name, args)
コード例 #5
0
def main():

    import argparse
    parser = argparse.ArgumentParser(description="imsitu VSRL. Training, evaluation and prediction.")
    parser.add_argument("--gpuid", default=-1, help="put GPU id > -1 in GPU mode", type=int)
    #parser.add_argument("--command", choices = ["train", "eval", "resume", 'predict'], required = True)
    parser.add_argument('--resume_training', action='store_true', help='Resume training from the model [resume_model]')
    parser.add_argument('--resume_model', type=str, default='', help='The model we resume')
    parser.add_argument('--verb_module', type=str, default='', help='pretrained verb module')
    parser.add_argument('--train_role', action='store_true', help='cnn fix, verb fix, role train from the scratch')
    parser.add_argument('--finetune_verb', action='store_true', help='cnn fix, verb finetune, role train from the scratch')
    parser.add_argument('--finetune_cnn', action='store_true', help='cnn finetune, verb finetune, role train from the scratch')
    parser.add_argument('--output_dir', type=str, default='./trained_models', help='Location to output the model')
    parser.add_argument('--evaluate', action='store_true', help='Only use the testing mode')
    parser.add_argument('--test', action='store_true', help='Only use the testing mode')
    #todo: train role module separately with gt verbs

    args = parser.parse_args()

    batch_size = 640
    #lr = 5e-6
    lr = 0.0001
    lr_max = 5e-4
    lr_gamma = 0.1
    lr_step = 25
    clip_norm = 50
    weight_decay = 1e-4
    n_epoch = 500
    n_worker = 3

    dataset_folder = 'imSitu'
    imgset_folder = 'resized_256'

    print('model spec :, mac net v + rn ')

    train_set = json.load(open(dataset_folder + "/train.json"))
    encoder = imsitu_encoder(train_set)

    model = model_mac_with_rn.E2ENetwork(encoder, args.gpuid)

    # To group up the features
    cnn_features, verb_features, role_features = utils.group_features_single(model)

    train_set = imsitu_loader(imgset_folder, train_set, encoder, model.train_preprocess())

    train_loader = torch.utils.data.DataLoader(train_set, batch_size=64, shuffle=True, num_workers=n_worker)

    dev_set = json.load(open(dataset_folder +"/dev.json"))
    dev_set = imsitu_loader(imgset_folder, dev_set, encoder, model.dev_preprocess())
    dev_loader = torch.utils.data.DataLoader(dev_set, batch_size=64, shuffle=True, num_workers=n_worker)

    test_set = json.load(open(dataset_folder +"/test.json"))
    test_set = imsitu_loader(imgset_folder, test_set, encoder, model.dev_preprocess())
    test_loader = torch.utils.data.DataLoader(test_set, batch_size=64, shuffle=True, num_workers=n_worker)

    traindev_set = json.load(open(dataset_folder +"/dev.json"))
    traindev_set = imsitu_loader(imgset_folder, traindev_set, encoder, model.dev_preprocess())
    traindev_loader = torch.utils.data.DataLoader(traindev_set, batch_size=8, shuffle=True, num_workers=n_worker)

    utils.set_trainable(model, False)
    if args.train_role:
        print('CNN fix, Verb fix, train role from the scratch from: {}'.format(args.verb_module))
        args.train_all = False
        if len(args.verb_module) == 0:
            raise Exception('[pretrained verb module] not specified')
        utils.load_net(args.verb_module, [model.conv, model.verb], ['conv', 'verb'])
        optimizer_select = 1
        model_name = 'cfx_vfx_rtrain'

    elif args.finetune_verb:
        print('CNN fix, Verb finetune, train role from the scratch from: {}'.format(args.verb_module))
        args.train_all = True
        if len(args.verb_module) == 0:
            raise Exception('[pretrained verb module] not specified')
        utils.load_net(args.verb_module, [model.conv, model.verb], ['conv', 'verb'])
        optimizer_select = 2
        model_name = 'cfx_vft_rtrain'

    elif args.finetune_cnn:
        print('CNN finetune, Verb finetune, train role from the scratch from: {}'.format(args.verb_module))
        args.train_all = True
        if len(args.verb_module) == 0:
            raise Exception('[pretrained verb module] not specified')
        utils.load_net(args.verb_module, [model.conv, model.verb], ['conv', 'verb'])
        optimizer_select = 3
        model_name = 'cft_vft_rtrain'

    elif args.resume_training:
        print('Resume training from: {}'.format(args.resume_model))
        args.train_all = True
        if len(args.resume_model) == 0:
            raise Exception('[pretrained verb module] not specified')
        utils.load_net(args.resume_model, [model])
        optimizer_select = 0
        model_name = 'resume_all'
    else:
        print('Training from the scratch.')
        optimizer_select = 0
        args.train_all = True
        model_name = 'train_full'

    optimizer = utils.get_optimizer_single(lr,weight_decay,optimizer_select,
                                           cnn_features, verb_features, role_features)

    if not os.path.exists(args.output_dir):
        os.mkdir(args.output_dir)

    if args.gpuid >= 0:
        #print('GPU enabled')
        model.cuda()

    optimizer = torch.optim.Adam([{'params': model.conv.parameters(), 'lr': 5e-5},
                                  {'params': model.verb.parameters()},
                                  {'params': model.role_labeller.parameters()}],
                                 lr=1e-3)

    #optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)
    #scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=lr_step, gamma=lr_gamma)
    #gradient clipping, grad check
    scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.9)

    if args.evaluate:
        top1, top5, val_loss = eval(model, dev_loader, encoder, args.gpuid, write_to_file = True)

        top1_avg = top1.get_average_results()
        top5_avg = top5.get_average_results()

        avg_score = top1_avg["verb"] + top1_avg["value"] + top1_avg["value-all"] + top5_avg["verb"] + \
                    top5_avg["value"] + top5_avg["value-all"]
        avg_score /= 8

        print ('Dev average :{:.2f} {} {}'.format( avg_score*100,
                                                   utils.format_dict(top1_avg,'{:.2f}', '1-'),
                                                   utils.format_dict(top5_avg, '{:.2f}', '5-')))

        #write results to csv file
        role_dict = top1.role_dict
        fail_val_all = top1.value_all_dict

        with open('role_pred_data.json', 'w') as fp:
            json.dump(role_dict, fp, indent=4)

        with open('fail_val_all.json', 'w') as fp:
            json.dump(fail_val_all, fp, indent=4)

        print('Writing predictions to file completed !')

    elif args.test:
        top1, top5, val_loss = eval(model, test_loader, encoder, args.gpuid, write_to_file = True)

        top1_avg = top1.get_average_results()
        top5_avg = top5.get_average_results()

        avg_score = top1_avg["verb"] + top1_avg["value"] + top1_avg["value-all"] + top5_avg["verb"] + \
                    top5_avg["value"] + top5_avg["value-all"]
        avg_score /= 8

        print ('Test average :{:.2f} {} {}'.format( avg_score*100,
                                                    utils.format_dict(top1_avg,'{:.2f}', '1-'),
                                                    utils.format_dict(top5_avg, '{:.2f}', '5-')))


    else:

        print('Model training started!')
        train(model, train_loader, dev_loader, traindev_loader, optimizer, scheduler, n_epoch, args.output_dir, encoder, args.gpuid, clip_norm, lr_max, model_name, args)
コード例 #6
0
def main():

    import argparse
    parser = argparse.ArgumentParser(
        description="imsitu VSRL. Training, evaluation and prediction.")
    parser.add_argument("--gpuid",
                        default=-1,
                        help="put GPU id > -1 in GPU mode",
                        type=int)
    parser.add_argument(
        "--command",
        choices=["train", "eval", "resume", 'predict', 'finetune'],
        required=True)
    parser.add_argument("--batch_size", '-b', type=int, default=64)
    parser.add_argument("--weights_file", help="the model to start from")
    parser.add_argument(
        '--finetune_verb',
        action='store_true',
        help='verb classifier train from the scratch, all others fixed')
    parser.add_argument('--verb_module',
                        type=str,
                        default='',
                        help='pretrained verb module')

    args = parser.parse_args()

    batch_size = args.batch_size
    #lr = 1e-5
    lr = 1e-4
    lr_max = 5e-4
    lr_gamma = 0.1
    lr_step = 25
    clip_norm = 50
    weight_decay = 1e-5
    n_epoch = 500
    n_worker = 4

    # print('LR scheme : lr decay, vgg, fc as per gnn paper batch 64', 1e-5, 0.1,25)

    dataset_folder = 'imSitu'
    imgset_folder = 'resized_256'
    model_dir = 'trained_models'

    train_set = json.load(open(dataset_folder + "/train.json"))
    encoder = imsitu_encoder(train_set)

    model = model_verb_embd.RelationNetworks(encoder, args.gpuid)

    train_set = imsitu_loader(imgset_folder, train_set, encoder,
                              model.train_preprocess())
    train_loader = torch.utils.data.DataLoader(train_set,
                                               batch_size=batch_size,
                                               shuffle=True,
                                               num_workers=n_worker)

    dev_set = json.load(open(dataset_folder + "/dev.json"))
    dev_set = imsitu_loader(imgset_folder, dev_set, encoder,
                            model.dev_preprocess())
    dev_loader = torch.utils.data.DataLoader(dev_set,
                                             batch_size=batch_size,
                                             shuffle=True,
                                             num_workers=n_worker)

    traindev_set = json.load(open(dataset_folder + "/dev.json"))
    traindev_set = imsitu_loader(imgset_folder, traindev_set, encoder,
                                 model.train_preprocess())
    traindev_loader = torch.utils.data.DataLoader(traindev_set,
                                                  batch_size=batch_size,
                                                  shuffle=True,
                                                  num_workers=n_worker)

    if args.command == "resume":
        print("loading model weights...")
        model.load_state_dict(torch.load(args.weights_file))
    elif args.finetune_verb:
        print(
            'CNN fix, Verb fc fixed, train verb classifier layer from the scratch from: {}'
            .format(args.verb_module))
        if len(args.verb_module) == 0:
            raise Exception('[pretrained verb module] not specified')
        utils.load_net(args.verb_module, [model.conv, model.verb],
                       ['conv', 'verb'])

    #print(model)
    if args.gpuid >= 0:
        print('GPU enabled')
        model.cuda()

    # optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)
    utils.set_trainable(model, False)
    utils.set_trainable_param(model.classifier.parameters(), True)
    utils.set_trainable_param(model.verb.parameters(), True)
    optimizer = torch.optim.Adam([{
        'params': model.classifier.parameters(),
        'lr': 1e-3
    }, {
        'params': model.verb.parameters(),
        'lr': 5e-5
    }])
    # scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=lr_step, gamma=lr_gamma)
    scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.9)
    '''optimizer = utils.CosineAnnealingWR(0.01,1200000 , 50,
            torch.optim.Adam(model.parameters(), lr=0, betas=(0.9, 0.98), eps=1e-9))'''

    #gradient clipping, grad check

    print('Model training started!')
    train(model, train_loader, dev_loader, traindev_loader, optimizer,
          scheduler, n_epoch, model_dir, encoder, args.gpuid, clip_norm,
          lr_max)
コード例 #7
0
def main():

    import argparse
    parser = argparse.ArgumentParser(
        description="imsitu VSRL. Training, evaluation and prediction.")
    parser.add_argument("--gpuid",
                        default=-1,
                        help="put GPU id > -1 in GPU mode",
                        type=int)
    parser.add_argument("--command",
                        choices=["train", "eval", "resume", 'predict'],
                        required=True)
    parser.add_argument("--weights_file", help="the model to start from")

    args = parser.parse_args()

    batch_size = 640
    lr = 5e-6
    lr_max = 5e-4
    lr_gamma = 2
    lr_step = 10
    clip_norm = 50
    weight_decay = 1e-4
    n_epoch = 500
    n_worker = 3

    dataset_folder = 'imsitu_data'
    imgset_folder = 'of500_images_resized'

    train_set = json.load(open(dataset_folder + "/train.json"))
    encoder = imsitu_encoder(train_set)

    model = model_updated_big.RelationNetworks(encoder, args.gpuid)

    train_set = imsitu_loader(imgset_folder, train_set, encoder,
                              model.train_preprocess())

    train_loader = torch.utils.data.DataLoader(train_set,
                                               batch_size=4,
                                               shuffle=True,
                                               num_workers=n_worker)

    dev_set = json.load(open(dataset_folder + "/dev.json"))
    dev_set = imsitu_loader(imgset_folder, dev_set, encoder,
                            model.train_preprocess())
    dev_loader = torch.utils.data.DataLoader(dev_set,
                                             batch_size=4,
                                             shuffle=True,
                                             num_workers=n_worker)

    traindev_set = json.load(open(dataset_folder + "/train_eval.json"))
    traindev_set = imsitu_loader(imgset_folder, traindev_set, encoder,
                                 model.train_preprocess())
    traindev_loader = torch.utils.data.DataLoader(traindev_set,
                                                  batch_size=8,
                                                  shuffle=True,
                                                  num_workers=n_worker)

    if args.command == "resume":
        print("loading model weights...")
        model.load_state_dict(torch.load(args.weights_file))

    if args.gpuid >= 0:
        #print('GPU enabled')
        model.cuda()

    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=lr,
                                 weight_decay=weight_decay)
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                step_size=lr_step,
                                                gamma=lr_gamma)
    #gradient clipping, grad check

    print('Model training started!')
    train(model, train_loader, dev_loader, traindev_loader, optimizer,
          scheduler, n_epoch, 'trained_models', encoder, args.gpuid, clip_norm,
          lr_max)
コード例 #8
0
def main():

    import argparse
    parser = argparse.ArgumentParser(description="imsitu VSRL. Training, evaluation and prediction.")
    parser.add_argument("--gpuid", default=-1, help="put GPU id > -1 in GPU mode", type=int)
    #parser.add_argument("--command", choices = ["train", "eval", "resume", 'predict'], required = True)
    parser.add_argument('--resume_training', action='store_true', help='Resume training from the model [resume_model]')
    parser.add_argument('--resume_model', type=str, default='', help='The model we resume')
    parser.add_argument('--agent_module', type=str, default='', help='pretrained agent module')
    parser.add_argument('--train_role', action='store_true', help='cnn fix, verb fix, role train from the scratch')
    parser.add_argument('--train_verb', action='store_true', help='cnn fix, agent fix, verb train from the scratch')
    parser.add_argument('--finetune_agent', action='store_true', help='cnn fix, agent finetune, verb train from the scratch')
    parser.add_argument('--finetune_cnn', action='store_true', help='cnn finetune, verb finetune, role train from the scratch')
    parser.add_argument('--output_dir', type=str, default='./trained_models', help='Location to output the model')
    parser.add_argument('--evaluate', action='store_true', help='Only use the testing mode')
    parser.add_argument('--test', action='store_true', help='Only use the testing mode')
    parser.add_argument('--dataset_folder', type=str, default='./imSitu', help='Location of annotations')
    parser.add_argument('--imgset_dir', type=str, default='./resized_256', help='Location of original images')
    parser.add_argument('--frcnn_feat_dir', type=str, help='Location of output from detectron')
    #todo: train role module separately with gt verbs

    args = parser.parse_args()

    batch_size = 640
    #lr = 5e-6
    lr = 0.0001
    lr_max = 5e-4
    lr_gamma = 0.1
    lr_step = 25
    clip_norm = 50
    weight_decay = 1e-4
    n_epoch = 500
    n_worker = 3

    #dataset_folder = 'imSitu'
    #imgset_folder = 'resized_256'
    dataset_folder = args.dataset_folder
    imgset_folder = args.imgset_dir

    print('model spec :, verb role with context ')

    train_set = json.load(open(dataset_folder + "/updated_train_new.json"))
    encoder = imsitu_encoder(train_set)

    model = model_agent2verb.BaseModel(encoder, args.gpuid)

    # To group up the features
    #all verb and role feat are under role as it's a single unit
    cnn_agent_features, cnn_verb_features, agent_features, verb_features = utils.group_features_agent2verb(model)

    train_set = imsitu_loader(imgset_folder, train_set, encoder, model.train_preprocess())

    train_loader = torch.utils.data.DataLoader(train_set, batch_size=64, shuffle=True, num_workers=n_worker)

    dev_set = json.load(open(dataset_folder +"/dev.json"))
    dev_set = imsitu_loader(imgset_folder, dev_set, encoder, model.dev_preprocess())
    dev_loader = torch.utils.data.DataLoader(dev_set, batch_size=64, shuffle=True, num_workers=n_worker)

    test_set = json.load(open(dataset_folder +"/test.json"))
    test_set = imsitu_loader(imgset_folder, test_set, encoder, model.dev_preprocess())
    test_loader = torch.utils.data.DataLoader(test_set, batch_size=64, shuffle=True, num_workers=n_worker)

    traindev_set = json.load(open(dataset_folder +"/dev.json"))
    traindev_set = imsitu_loader(imgset_folder, traindev_set, encoder, model.dev_preprocess())
    traindev_loader = torch.utils.data.DataLoader(traindev_set, batch_size=8, shuffle=True, num_workers=n_worker)

    utils.set_trainable(model, False)


    if args.train_verb:
        print('CNN fix, agent fix, train verb from the scratch from: {}'.format(args.agent_module))
        args.train_all = False
        if len(args.agent_module) == 0:
            raise Exception('[pretrained agent module] not specified')
        utils.load_net(args.agent_module, [model.conv_agent, model.agent], ['conv', 'agent'])
        optimizer_select = 1
        model_name = 'cfx_afx_vtrain'

    elif args.finetune_agent:
        print('CNN fix, agent finetune, train verb from the scratch from: {}'.format(args.agent_module))
        args.train_all = True
        if len(args.agent_module) == 0:
            raise Exception('[pretrained agent module] not specified')
        utils.load_net(args.agent_module, [model.conv, model.agent], ['conv', 'agent'])
        optimizer_select = 2
        model_name = 'cfx_aft_vtrain'

    else:
        print('Training from the scratch.')
        optimizer_select = 0
        args.train_all = True
        model_name = 'train_full'

    '''optimizer = utils.get_optimizer_noun(lr,weight_decay,optimizer_select,
                                         cnn_features, role_features)'''

    optimizer = utils.get_optimizer_agent2verb(lr,weight_decay,optimizer_select,
                                               cnn_agent_features, cnn_verb_features, agent_features, verb_features)

    if not os.path.exists(args.output_dir):
        os.mkdir(args.output_dir)

    torch.manual_seed(1234)
    if args.gpuid >= 0:
        #print('GPU enabled')
        model.cuda()
        torch.cuda.manual_seed(1234)
        torch.backends.cudnn.deterministic = True

    optimizer = torch.optim.Adamax([{'params': cnn_verb_features, 'lr': 5e-5},
                                    {'params': verb_features}],
                                   lr=1e-3)

    #optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)
    #scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=lr_step, gamma=lr_gamma)
    #gradient clipping, grad check
    scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.9)

    if args.evaluate:
        top1, top5, val_loss = eval(model, dev_loader, encoder, args.gpuid, write_to_file = True)

        top1_avg = top1.get_average_results()
        top5_avg = top5.get_average_results()

        avg_score = top1_avg["verb"] + top1_avg["value"] + top1_avg["value-all"] + top5_avg["verb"] + \
                    top5_avg["value"] + top5_avg["value-all"] + top5_avg["value*"] + top5_avg["value-all*"]
        avg_score /= 8

        print ('Dev average :{:.2f} {} {}'.format( avg_score*100,
                                                   utils.format_dict(top1_avg,'{:.2f}', '1-'),
                                                   utils.format_dict(top5_avg, '{:.2f}', '5-')))

        #write results to csv file
        role_dict = top1.role_dict
        fail_agent = top1.fail_agent
        #print('roles :', role_dict)
        #fail_val_all = top1.value_all_dict
        #pass_val_dict = top1.vall_all_correct

        with open('role_pred_data.json', 'w') as fp:
            json.dump(role_dict, fp, indent=4)

        with open('fail_agent.json', 'w') as fp:
            json.dump(fail_agent, fp, indent=4)

        '''with open('fail_val_all.json', 'w') as fp:
            json.dump(fail_val_all, fp, indent=4)

        with open('pass_val_all.json', 'w') as fp:
            json.dump(pass_val_dict, fp, indent=4)'''

        print('Writing predictions to file completed !')

    elif args.test:
        top1, top5, val_loss = eval(model, test_loader, encoder, args.gpuid, write_to_file = True)

        top1_avg = top1.get_average_results()
        top5_avg = top5.get_average_results()

        avg_score = top1_avg["verb"] + top1_avg["value"] + top1_avg["value-all"] + top5_avg["verb"] + \
                    top5_avg["value"] + top5_avg["value-all"] + top5_avg["value*"] + top5_avg["value-all*"]
        avg_score /= 8

        print ('Test average :{:.2f} {} {}'.format( avg_score*100,
                                                    utils.format_dict(top1_avg,'{:.2f}', '1-'),
                                                    utils.format_dict(top5_avg, '{:.2f}', '5-')))


    else:

        print('Model training started!')
        train(model, train_loader, dev_loader, traindev_loader, optimizer, scheduler, n_epoch, args.output_dir, encoder, args.gpuid, clip_norm, lr_max, model_name, args)
コード例 #9
0
def main():

    import argparse
    parser = argparse.ArgumentParser(
        description="imsitu VSRL. Training, evaluation and prediction.")
    parser.add_argument("--gpuid",
                        default=-1,
                        help="put GPU id > -1 in GPU mode",
                        type=int)
    parser.add_argument("--command",
                        choices=["train", "eval", "resume", 'predict'],
                        required=True)
    parser.add_argument("--batch_size", '-b', type=int, default=16)
    parser.add_argument("--weights_file", help="the model to start from")
    parser.add_argument("--verb_group_file",
                        help="csv containing most probable words for triplets")
    parser.add_argument(
        '--margin',
        type=float,
        default=0.2,
        help='the margin value for the triplet loss function (default: 0.2')

    args = parser.parse_args()

    batch_size = args.batch_size
    #lr = 1e-5
    lr = 1e-4
    lr_max = 5e-4
    lr_gamma = 0.1
    lr_step = 25
    clip_norm = 50
    weight_decay = 1e-5
    n_epoch = 500
    n_worker = 4

    # print('LR scheme : lr decay, vgg, fc as per gnn paper batch 64', 1e-5, 0.1,25)

    dataset_folder = 'imSitu'
    imgset_folder = 'resized_256'
    model_dir = 'trained_models'

    train_set = json.load(open(dataset_folder + "/train.json"))
    encoder = imsitu_encoder(train_set)

    model = model_verb_tuan.RelationNetworks(encoder, args.gpuid)
    #triplet_loss = model_verb_tuan.TripletMarginLoss(args.margin)

    train_set = imsitu_triplet_loader(imgset_folder, train_set, encoder,
                                      args.verb_group_file,
                                      model.train_preprocess())
    train_loader = torch.utils.data.DataLoader(train_set,
                                               batch_size=batch_size,
                                               shuffle=True,
                                               num_workers=n_worker)

    dev_set = json.load(open(dataset_folder + "/dev.json"))
    dev_set = imsitu_loader(imgset_folder, dev_set, encoder,
                            model.dev_preprocess())
    dev_loader = torch.utils.data.DataLoader(dev_set,
                                             batch_size=batch_size,
                                             shuffle=True,
                                             num_workers=n_worker)

    traindev_set = json.load(open(dataset_folder + "/dev.json"))
    traindev_set = imsitu_loader(imgset_folder, traindev_set, encoder,
                                 model.train_preprocess())
    traindev_loader = torch.utils.data.DataLoader(traindev_set,
                                                  batch_size=batch_size,
                                                  shuffle=True,
                                                  num_workers=n_worker)

    if args.command == "resume":
        print("loading model weights...")
        model.load_state_dict(torch.load(args.weights_file))

    #print(model)
    if args.gpuid >= 0:
        #print('GPU enabled')
        model.cuda()

    # optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)
    optimizer = torch.optim.Adam([{
        'params': model.conv.parameters(),
        'lr': 5e-5
    }, {
        'params': model.verb.parameters()
    }],
                                 lr=1e-3)
    # scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=lr_step, gamma=lr_gamma)
    scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.9)
    '''optimizer = utils.CosineAnnealingWR(0.01,1200000 , 50,
            torch.optim.Adam(model.parameters(), lr=0, betas=(0.9, 0.98), eps=1e-9))'''

    #gradient clipping, grad check

    print('Model training started!')
    train(model, train_loader, dev_loader, traindev_loader, optimizer,
          scheduler, n_epoch, model_dir, encoder, args.gpuid, clip_norm,
          lr_max)
def main():

    import argparse
    parser = argparse.ArgumentParser(
        description="imsitu VSRL. Training, evaluation and prediction.")
    parser.add_argument("--gpuid",
                        default=-1,
                        help="put GPU id > -1 in GPU mode",
                        type=int)
    #parser.add_argument("--command", choices = ["train", "eval", "resume", 'predict'], required = True)
    parser.add_argument('--resume_training',
                        action='store_true',
                        help='Resume training from the model [resume_model]')
    parser.add_argument('--resume_model',
                        type=str,
                        default='',
                        help='The model we resume')
    parser.add_argument('--verb_module',
                        type=str,
                        default='',
                        help='pretrained verb module')
    parser.add_argument('--train_role',
                        action='store_true',
                        help='cnn fix, verb fix, role train from the scratch')
    parser.add_argument(
        '--finetune_verb',
        action='store_true',
        help='cnn fix, verb finetune, role train from the scratch')
    parser.add_argument(
        '--finetune_cnn',
        action='store_true',
        help='cnn finetune, verb finetune, role train from the scratch')
    parser.add_argument('--output_dir',
                        type=str,
                        default='./trained_models',
                        help='Location to output the model')
    #todo: train role module separately with gt verbs

    args = parser.parse_args()

    batch_size = 640
    #lr = 5e-6
    lr = 0
    lr_max = 5e-4
    lr_gamma = 0.1
    lr_step = 25
    clip_norm = 50
    weight_decay = 1e-4
    n_epoch = 500
    n_worker = 3

    dataset_folder = 'imSitu'
    imgset_folder = 'resized_256'

    print(
        'model spec :, 256 hidden, 1e-4 init lr, 25 epoch decay, 4 layer mlp for g,2mlp f1, 3 att layers with res connections param init xavier uni 2 heads dropout 0.5 mask 6loss maskb4g transformopt'
    )

    train_set = json.load(open(dataset_folder + "/train.json"))
    encoder = imsitu_encoder(train_set)

    model = model_vsrl_finetune_selfatt_ff.RelationNetworks(
        encoder, args.gpuid)

    # To group up the features
    cnn_features, verb_features, role_features = utils.group_features(model)

    train_set = imsitu_loader(imgset_folder, train_set, encoder,
                              model.train_preprocess())

    train_loader = torch.utils.data.DataLoader(train_set,
                                               batch_size=24,
                                               shuffle=True,
                                               num_workers=n_worker)

    dev_set = json.load(open(dataset_folder + "/dev.json"))
    dev_set = imsitu_loader(imgset_folder, dev_set, encoder,
                            model.train_preprocess())
    dev_loader = torch.utils.data.DataLoader(dev_set,
                                             batch_size=24,
                                             shuffle=True,
                                             num_workers=n_worker)

    traindev_set = json.load(open(dataset_folder + "/dev.json"))
    traindev_set = imsitu_loader(imgset_folder, traindev_set, encoder,
                                 model.train_preprocess())
    traindev_loader = torch.utils.data.DataLoader(traindev_set,
                                                  batch_size=8,
                                                  shuffle=True,
                                                  num_workers=n_worker)

    utils.set_trainable(model, False)
    if args.train_role:
        print('CNN fix, Verb fix, train role from the scratch from: {}'.format(
            args.verb_module))
        args.train_all = False
        if len(args.verb_module) == 0:
            raise Exception('[pretrained verb module] not specified')
        utils.load_net(args.verb_module, [model.conv, model.verb],
                       ['conv', 'verb'])
        optimizer_select = 1
        model_name = 'cfx_vfx_rtrain'

    elif args.finetune_verb:
        print('CNN fix, Verb finetune, train role from the scratch from: {}'.
              format(args.verb_module))
        args.train_all = True
        if len(args.verb_module) == 0:
            raise Exception('[pretrained verb module] not specified')
        utils.load_net(args.verb_module, [model.conv, model.verb],
                       ['conv', 'verb'])
        optimizer_select = 2
        model_name = 'cfx_vft_rtrain'

    elif args.finetune_cnn:
        print(
            'CNN finetune, Verb finetune, train role from the scratch from: {}'
            .format(args.verb_module))
        args.train_all = True
        if len(args.verb_module) == 0:
            raise Exception('[pretrained verb module] not specified')
        utils.load_net(args.verb_module, [model.conv, model.verb],
                       ['conv', 'verb'])
        optimizer_select = 3
        model_name = 'cft_vft_rtrain'

    elif args.resume_training:
        print('Resume training from: {}'.format(args.resume_model))
        args.train_all = True
        if len(args.resume_model) == 0:
            raise Exception('[pretrained verb module] not specified')
        utils.load_net(args.resume_model, [model])
        optimizer_select = 0
        model_name = 'resume_all'
    else:
        print('Training from the scratch.')
        optimizer_select = 0
        args.train_all = True
        model_name = 'train_full'

    optimizer = utils.get_optimizer(lr, weight_decay, optimizer_select,
                                    cnn_features, verb_features, role_features)

    if not os.path.exists(args.output_dir):
        os.mkdir(args.output_dir)

    if args.gpuid >= 0:
        #print('GPU enabled')
        model.cuda()

    opt = utils.NoamOpt(256, 1, 4000, optimizer)

    #optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                step_size=lr_step,
                                                gamma=lr_gamma)
    #gradient clipping, grad check

    print('Model training started!')
    train(model, train_loader, dev_loader, traindev_loader, opt, scheduler,
          n_epoch, args.output_dir, encoder, args.gpuid, clip_norm, lr_max,
          model_name, args)