shuffle=False, num_workers=args.worker, pin_memory=True) args.exp_dir = os.path.join(args.exp_dir, args.feature, args.losstype) if not os.path.exists(args.exp_dir): os.makedirs("%s/models" % args.exp_dir) if args.dataset == 'mscoco': input_dim = 49 else: input_dim = 81 if args.feature == 'tensor': audio_model = models.DavenetSmall(input_dim=input_dim, embedding_dim=512) image_model = models.NoOpEncoder(embedding_dim=512) if not args.only_eval: train(audio_model, image_model, train_loader, val_loader, args) else: evaluation(audio_model, image_model, val_loader, args) elif args.feature == 'vector': audio_model = models.CNN_RNN_ENCODER(input_dim=input_dim, embedding_dim=512, n_layer=3) image_model = models.LinearTrans(embedding_dim=512) if not args.only_eval: train(audio_model, image_model, train_loader, val_loader, args) else: evaluation_vector(audio_model, image_model, val_loader, args)
image_feat_type='rcnn'), batch_size=args.batch_size, shuffle=False, num_workers=args.worker, pin_memory=True) args.exp_dir = os.path.join(args.exp_dir, args.feature, args.losstype) if not os.path.exists(args.exp_dir): os.makedirs("%s/models" % args.exp_dir) if args.feature == 'tensor': if args.dataset == 'mscoco': input_dim = 49 else: input_dim = 81 audio_model = models.DavenetSmall(input_dim=input_dim, embedding_dim=512) image_model = models.LinearTrans(input_dim=2048, embedding_dim=512) if not args.only_eval: train(audio_model, image_model, train_loader, val_loader, args) else: evaluation(audio_model, image_model, val_loader, args) elif args.feature == 'vector': audio_model = models.CNN_RNN_ENCODER(embedding_dim=512, n_layer=3) image_model = models.LinearTrans(embedding_dim=512) if not args.only_eval: train(audio_model, image_model, train_loader, val_loader, args) else: evaluation_vector(audio_model, image_model, val_loader, args)
'val', max_nregions=15, image_feat_type='rcnn'), batch_size=args.batch_size, shuffle=False, num_workers=args.worker, pin_memory=True) args.exp_dir = os.path.join(args.exp_dir, args.feature, args.losstype) if not os.path.exists(args.exp_dir): os.makedirs("%s/models" % args.exp_dir) if args.precompute_acoustic_feature: audio_model = models.NoOpEncoder(embedding_dim=1000) image_model = models.LinearTrans(input_dim=2048, embedding_dim=1000) attention_model = models.DotProductAttention(in_size=1000) if not args.only_eval: train_attention(audio_model, image_model, attention_model, train_loader, val_loader, args) else: evaluation_attention(audio_model, image_model, attention_model, val_loader, args) else: audio_model = models.Davenet(embedding_dim=1024) image_model = models.LinearTrans(input_dim=2048, embedding_dim=1024) attention_model = models.DotProductAttention(in_size=1024) if not args.only_eval: train_attention(audio_model, image_model, attention_model, train_loader, val_loader, args) else: