예제 #1
0
        shuffle=False,
        num_workers=args.worker,
        pin_memory=True)

args.exp_dir = os.path.join(args.exp_dir, args.feature, args.losstype)

if not os.path.exists(args.exp_dir):
    os.makedirs("%s/models" % args.exp_dir)

if args.dataset == 'mscoco':
    input_dim = 49
else:
    input_dim = 81

if args.feature == 'tensor':
    audio_model = models.DavenetSmall(input_dim=input_dim, embedding_dim=512)
    image_model = models.NoOpEncoder(embedding_dim=512)
    if not args.only_eval:
        train(audio_model, image_model, train_loader, val_loader, args)
    else:
        evaluation(audio_model, image_model, val_loader, args)
elif args.feature == 'vector':
    audio_model = models.CNN_RNN_ENCODER(input_dim=input_dim,
                                         embedding_dim=512,
                                         n_layer=3)
    image_model = models.LinearTrans(embedding_dim=512)
    if not args.only_eval:
        train(audio_model, image_model, train_loader, val_loader, args)
    else:
        evaluation_vector(audio_model, image_model, val_loader, args)
예제 #2
0
                                                   image_feat_type='rcnn'),
        batch_size=args.batch_size,
        shuffle=False,
        num_workers=args.worker,
        pin_memory=True)

args.exp_dir = os.path.join(args.exp_dir, args.feature, args.losstype)

if not os.path.exists(args.exp_dir):
    os.makedirs("%s/models" % args.exp_dir)

if args.feature == 'tensor':
    if args.dataset == 'mscoco':
        input_dim = 49
    else:
        input_dim = 81

    audio_model = models.DavenetSmall(input_dim=input_dim, embedding_dim=512)
    image_model = models.LinearTrans(input_dim=2048, embedding_dim=512)
    if not args.only_eval:
        train(audio_model, image_model, train_loader, val_loader, args)
    else:
        evaluation(audio_model, image_model, val_loader, args)
elif args.feature == 'vector':
    audio_model = models.CNN_RNN_ENCODER(embedding_dim=512, n_layer=3)
    image_model = models.LinearTrans(embedding_dim=512)
    if not args.only_eval:
        train(audio_model, image_model, train_loader, val_loader, args)
    else:
        evaluation_vector(audio_model, image_model, val_loader, args)
예제 #3
0
                                                   'val',
                                                   max_nregions=15,
                                                   image_feat_type='rcnn'),
        batch_size=args.batch_size,
        shuffle=False,
        num_workers=args.worker,
        pin_memory=True)

args.exp_dir = os.path.join(args.exp_dir, args.feature, args.losstype)

if not os.path.exists(args.exp_dir):
    os.makedirs("%s/models" % args.exp_dir)

if args.precompute_acoustic_feature:
    audio_model = models.NoOpEncoder(embedding_dim=1000)
    image_model = models.LinearTrans(input_dim=2048, embedding_dim=1000)
    attention_model = models.DotProductAttention(in_size=1000)
    if not args.only_eval:
        train_attention(audio_model, image_model, attention_model,
                        train_loader, val_loader, args)
    else:
        evaluation_attention(audio_model, image_model, attention_model,
                             val_loader, args)
else:
    audio_model = models.Davenet(embedding_dim=1024)
    image_model = models.LinearTrans(input_dim=2048, embedding_dim=1024)
    attention_model = models.DotProductAttention(in_size=1024)
    if not args.only_eval:
        train_attention(audio_model, image_model, attention_model,
                        train_loader, val_loader, args)
    else: