Exemple #1
0
 def __init__(self,
              args,
              ques_feat_size,
              image_feature_size,
              lidar_feature_size,
              num_classes,
              qa=None,
              encoder='lstm',
              grouping='single_scale'):
     super(MCB_LIDAR, self).__init__()
     self.qa = qa
     self.image_feat_size = image_feature_size
     self.vocab = load_vocab(os.path.join(args.input_base, args.vocab))
     N = len(self.vocab['question_token_to_idx'])
     D = 200
     padding = self.vocab['question_token_to_idx']['<NULL>']
     self.embeddings = nn.Embedding(N, D, padding_idx=padding)
     self.question_module = QuestionModule(ques_feat_size, self.embeddings,
                                           encoder)
     self.attention = MCBAttention(ques_feat_size, 512, 512, 8000)
     self.classifier = AnswerModule(8000 + ques_feat_size + 1024,
                                    num_classes, (256, ),
                                    use_batchnorm=True,
                                    dropout=0.5)  #3584 if method is concat
     self.mcb = MCBPolling(512, 8000, n_modalities=2)
     self.linweights = nn.Linear(ques_feat_size, 7)
     self.grouping = grouping
     if self.grouping == 'single_scale':
         self.lidar_module = LidarSsgModule(normal_channel=False)
     if self.grouping == 'single_scale':
         self.lidar_module = LidarMsgModule(normal_channel=False)
    def __init__(self,
                 args,
                 ques_feat_size,
                 image_feature_size,
                 lidar_feature_size,
                 num_classes,
                 qa=None,
                 encoder='lstm'):
        super(LSTM_BASIC, self).__init__()
        self.qa = qa

        # special_words = ["<UNK>"]
        # self.vocab = load_vocab(os.path.join(args.input_base, args.vocab))
        # word_vectors = get_word2vec(os.path.join(args.input_base, args.ques_vectors))

        # padding = vocab['question_token_to_idx']['<NULL>']
        # D = word2vec.vector_size
        # self.embeddings = get_embeddings(self.vocab, word_vectors, special_words)

        self.vocab = load_vocab(os.path.join(args.input_base, args.vocab))
        N = len(self.vocab['question_token_to_idx'])
        D = 200
        padding = self.vocab['question_token_to_idx']['<NULL>']
        self.embeddings = nn.Embedding(N, D, padding_idx=padding)
        self.question_module = QuestionModule(ques_feat_size, self.embeddings,
                                              encoder)
        self.classifier = AnswerModule(ques_feat_size, num_classes, (256, ))
Exemple #3
0
    def __init__(self,
                 args,
                 ques_feat_size,
                 image_feature_size,
                 lidar_feature_size,
                 num_classes,
                 qa=None,
                 encoder='lstm',
                 method='concat'):
        super(MCB, self).__init__()
        self.qa = qa

        # special_words = ["<UNK>"]
        # self.vocab = load_vocab(os.path.join(args.input_base, args.vocab))
        # word_vectors = get_word2vec(os.path.join(args.input_base, args.ques_vectors))

        # padding = vocab['question_token_to_idx']['<NULL>']
        # D = word2vec.vector_size
        # self.embeddings = get_embeddings(self.vocab, word_vectors, special_words)
        self.image_feat_size = image_feature_size
        self.vocab = load_vocab(os.path.join(args.input_base, args.vocab))
        N = len(self.vocab['question_token_to_idx'])
        D = 200
        padding = self.vocab['question_token_to_idx']['<NULL>']
        self.embeddings = nn.Embedding(N, D, padding_idx=padding)
        self.question_module = QuestionModule(ques_feat_size, self.embeddings,
                                              encoder)
        self.attention = MCBAttention(ques_feat_size, 512, 512, 8000)
        self.classifier = AnswerModule(8000,
                                       num_classes, (256, ),
                                       use_batchnorm=True,
                                       dropout=0.5)  #3584 if method is concat
        self.mcb = MCBPolling(512, 8000, n_modalities=2)
        self.linweights = nn.Linear(ques_feat_size, 7)
        self.method = method
Exemple #4
0
    def __init__(self, args, ques_feat_size, image_feature_size, lidar_feature_size,num_classes, qa=None, encoder='lstm',method='concat'):
        super(MUTAN,self).__init__()
        self.qa = qa

        # special_words = ["<UNK>"]
        # self.vocab = load_vocab(os.path.join(args.input_base, args.vocab))
        # word_vectors = get_word2vec(os.path.join(args.input_base, args.ques_vectors))

        # padding = vocab['question_token_to_idx']['<NULL>']
        # D = word2vec.vector_size
        # self.embeddings = get_embeddings(self.vocab, word_vectors, special_words)
        self.image_feat_size=image_feature_size
        self.vocab = load_vocab(os.path.join(args.input_base, args.vocab))
        N = len(self.vocab['question_token_to_idx'])
        D = 200
        padding = self.vocab['question_token_to_idx']['<NULL>']
        self.embeddings = nn.Embedding(N, D, padding_idx=padding)
        self.question_module = QuestionModule(ques_feat_size, self.embeddings, encoder)
        self.opt=dict(
            dim_v= 512,
            dim_q= 1024,
            attention=dict(
                nb_glimpses= 2,
                dim_hv= 310,
                dim_hq= 310,
                dim_mm= 510,
                R= 5,
                dropout_v= 0.5,
                dropout_q= 0.5,
                dropout_mm= 0.5,
                activation_v= "tanh",
                activation_q= "tanh",
                dropout_hv= 0,
                dropout_hq= 0),
            fusion=dict(
                dim_hv= 620,
                dim_hq= 310,
                dim_mm= 510,
                R= 5,
                dropout_v= 0.5,
                dropout_q= 0.5,
                activation_v= "tanh",
                activation_q= "tanh",
                dropout_hv= 0,
                dropout_hq= 0)
        )
        self.attention=MutanAtt(self.opt)
        self.classifier = AnswerModule(self.opt['fusion']['dim_mm'], num_classes,(),use_batchnorm=True,dropout=0.5)  #3584 if method is concat
        self.linweights=nn.Linear(ques_feat_size ,7)
        self.method=method
    def __init__(self, args, ques_feat_size, image_feature_size, lidar_feature_size,num_classes, qa=None, encoder='lstm',grouping='single_scale'):
        super(MUTAN_LIDAR,self).__init__()
        self.qa = qa
        self.image_feat_size=image_feature_size
        self.vocab = load_vocab(os.path.join(args.input_base, args.vocab))
        N = len(self.vocab['question_token_to_idx'])
        D = 200
        padding = self.vocab['question_token_to_idx']['<NULL>']
        self.embeddings = nn.Embedding(N, D, padding_idx=padding)
        self.question_module = QuestionModule(ques_feat_size, self.embeddings, encoder)
        self.opt=dict(
            dim_v= 512,
            dim_q= 1024,
            attention=dict(
                nb_glimpses= 2,
                dim_hv= 310,
                dim_hq= 310,
                dim_mm= 510,
                R= 5,
                dropout_v= 0.5,
                dropout_q= 0.5,
                dropout_mm= 0.5,
                activation_v= "tanh",
                activation_q= "tanh",
                dropout_hv= 0,
                dropout_hq= 0),
            fusion=dict(
                dim_hv= 620,
                dim_hq= 310,
                dim_mm= 510,
                R= 5,
                dropout_v= 0.5,
                dropout_q= 0.5,
                activation_v= "tanh",
                activation_q= "tanh",
                dropout_hv= 0,
                dropout_hq= 0)
        )
        self.attention=MutanAtt(self.opt)
        self.classifier = AnswerModule(self.opt['fusion']['dim_mm']+1024+ques_feat_size, num_classes,(),use_batchnorm=True,dropout=0.5)  #3584 if method is concat
        self.linweights=nn.Linear(ques_feat_size ,7)

        self.grouping=grouping
        if self.grouping=='single_scale':
            self.lidar_module=LidarSsgModule(normal_channel=False)
        if self.grouping=='single_scale':
            self.lidar_module=LidarMsgModule(normal_channel=False)
    def __init__(self,
                 args,
                 ques_feat_size,
                 image_feature_size,
                 lidar_feature_size,
                 num_classes,
                 qa=None,
                 encoder='lstm',
                 method='concat'):
        super(DAN, self).__init__()
        self.qa = qa

        # special_words = ["<UNK>"]
        # self.vocab = load_vocab(os.path.join(args.input_base, args.vocab))
        # word_vectors = get_word2vec(os.path.join(args.input_base, args.ques_vectors))

        # padding = vocab['question_token_to_idx']['<NULL>']
        # D = word2vec.vector_size
        # self.embeddings = get_embeddings(self.vocab, word_vectors, special_words)
        self.image_feat_size = image_feature_size
        self.vocab = load_vocab(os.path.join(args.input_base, args.vocab))
        N = len(self.vocab['question_token_to_idx'])
        D = 200
        padding = self.vocab['question_token_to_idx']['<NULL>']
        self.embeddings = nn.Embedding(N, D, padding_idx=padding)
        self.question_module = QuestionModule(ques_feat_size,
                                              self.embeddings,
                                              encoder,
                                              bidirectional=True,
                                              give_last=False)
        self.attention = DANAttention(ques_feat_size, 512, 512, 2)
        self.linweights = nn.Linear(ques_feat_size, 7)
        self.method = method
        self.softmax = nn.Softmax(dim=1)
        self.tanh = nn.Tanh()
        if self.method == 'concat':
            self.classifier = AnswerModule(
                2 * 512 * 7 + ques_feat_size,
                num_classes, (256, ),
                use_batchnorm=True,
                dropout=0.5)  #3584 if method is concat
        else:
            self.classifier = AnswerModule(
                2 * 512, num_classes, (256, ), use_batchnorm=True,
                dropout=0.5)  #3584 if method is concat
    def __init__(self,
                 args,
                 ques_feat_size,
                 image_feature_size,
                 lidar_feature_size,
                 num_classes,
                 qa=None,
                 encoder='lstm',
                 method='concat'):
        super(SAN, self).__init__()
        self.qa = qa

        # special_words = ["<UNK>"]
        # self.vocab = load_vocab(os.path.join(args.input_base, args.vocab))
        # word_vectors = get_word2vec(os.path.join(args.input_base, args.ques_vectors))

        # padding = vocab['question_token_to_idx']['<NULL>']
        # D = word2vec.vector_size
        # self.embeddings = get_embeddings(self.vocab, word_vectors, special_words)
        self.image_feat_size = image_feature_size
        self.vocab = load_vocab(os.path.join(args.input_base, args.vocab))
        N = len(self.vocab['question_token_to_idx'])
        D = 200
        padding = self.vocab['question_token_to_idx']['<NULL>']
        self.embeddings = nn.Embedding(N, D, padding_idx=padding)
        self.question_module = QuestionModule(ques_feat_size, self.embeddings,
                                              encoder)
        self.attention = StackedAttention(ques_feat_size, 512, 512, 2)
        self.method = method
        if self.method == 'concat':
            self.classifier = AnswerModule(
                3584, num_classes, (256, ), use_batchnorm=True,
                dropout=0.5)  #3584 if method is concat
        if self.method == 'hierarchical':
            self.classifier = AnswerModule(
                512, num_classes, (256, ), use_batchnorm=True,
                dropout=0.5)  #3584 if method is concat
        # self.linweights=nn.Linear(512,7)
        self.linweights = nn.Sequential(
            nn.Linear(512, 256),
            nn.ReLU(inplace=True),
            nn.Dropout(0.5),
            nn.Linear(256, 7),  #no of glimpses
            nn.Sigmoid())
    def __init__(self, args, ques_feat_size, image_feature_size, lidar_feature_size,num_classes, qa=None, encoder='lstm',grouping='single_scale'):
        super(MLB_LIDAR,self).__init__()
        self.qa = qa

        # special_words = ["<UNK>"]
        # self.vocab = load_vocab(os.path.join(args.input_base, args.vocab))
        # word_vectors = get_word2vec(os.path.join(args.input_base, args.ques_vectors))

        # padding = vocab['question_token_to_idx']['<NULL>']
        # D = word2vec.vector_size
        # self.embeddings = get_embeddings(self.vocab, word_vectors, special_words)
        self.image_feat_size=image_feature_size
        self.vocab = load_vocab(os.path.join(args.input_base, args.vocab))
        N = len(self.vocab['question_token_to_idx'])
        D = 200
        padding = self.vocab['question_token_to_idx']['<NULL>']
        self.embeddings = nn.Embedding(N, D, padding_idx=padding)
        self.question_module = QuestionModule(ques_feat_size, self.embeddings, encoder)
        self.opt=dict(
            dim_v=512,
            dim_q = 1024,
            attention=dict(
                nb_glimpses= 4,
                dim_h= 1200,
                dropout_v= 0.5,
                dropout_q= 0.5,
                dropout_mm= 0.5,
                activation_v= "tanh",
                activation_q= "tanh",
                activation_mm= "tanh"),
            fusion=dict(
                dim_h= 1200,
                dropout_v= 0.5,
                dropout_q= 0.5,
                activation_v= "tanh",
                activation_q= "tanh")

        )
        self.attention=MLBAtt(self.opt)
        self.classifier = AnswerModule(self.opt['fusion']['dim_h']* self.opt['attention']['nb_glimpses']+ques_feat_size+1024, num_classes,(),use_batchnorm=True,dropout=0.5)  #3584 if method is concat
        self.linweights=nn.Linear(ques_feat_size ,7)
        self.lidar_module=LidarSsgModule(normal_channel=False)
Exemple #9
0
    def __init__(self,
                 args,
                 ques_feat_size,
                 image_feature_size,
                 lidar_feature_size,
                 num_classes,
                 qa=None,
                 encoder='lstm',
                 grouping='single_scale'):
        super(MFB_LIDAR, self).__init__()
        self.qa = qa

        # special_words = ["<UNK>"]
        # self.vocab = load_vocab(os.path.join(args.input_base, args.vocab))
        # word_vectors = get_word2vec(os.path.join(args.input_base, args.ques_vectors))

        # padding = vocab['question_token_to_idx']['<NULL>']
        # D = word2vec.vector_size
        # self.embeddings = get_embeddings(self.vocab, word_vectors, special_words)
        self.image_feat_size = image_feature_size
        self.vocab = load_vocab(os.path.join(args.input_base, args.vocab))
        N = len(self.vocab['question_token_to_idx'])
        D = 200
        padding = self.vocab['question_token_to_idx']['<NULL>']
        self.embeddings = nn.Embedding(N, D, padding_idx=padding)
        self.question_module = QuestionModule(ques_feat_size,
                                              self.embeddings,
                                              encoder,
                                              give_last=False)
        self.attention = CoAtt(ques_feat_size, image_feature_size, 2)
        self.classifier = AnswerModule(
            500 + ques_feat_size + 1024,
            num_classes, (256, ),
            use_batchnorm=True,
            dropout=0.5)  #3500 if method is concat else 500
        self.linweights = nn.Linear(ques_feat_size, 7)
        self.grouping = grouping
        if self.grouping == 'single_scale':
            self.lidar_module = LidarSsgModule(normal_channel=False)
        if self.grouping == 'single_scale':
            self.lidar_module = LidarMsgModule(normal_channel=False)
Exemple #10
0
    def __init__(self,
                 args,
                 ques_feat_size,
                 image_feature_size,
                 lidar_feature_size,
                 num_classes,
                 qa=None,
                 encoder='lstm',
                 method='dot'):
        super(CNN_LSTM, self).__init__()
        self.qa = qa

        # special_words = ["<UNK>"]
        # self.vocab = load_vocab(os.path.join(args.input_base, args.vocab))
        # word_vectors = get_word2vec(os.path.join(args.input_base, args.ques_vectors))

        # padding = vocab['question_token_to_idx']['<NULL>']
        # D = word2vec.vector_size
        # self.embeddings = get_embeddings(self.vocab, word_vectors, special_words)
        self.image_feat_size = image_feature_size
        self.vocab = load_vocab(os.path.join(args.input_base, args.vocab))
        N = len(self.vocab['question_token_to_idx'])
        D = 200
        padding = self.vocab['question_token_to_idx']['<NULL>']
        self.embeddings = nn.Embedding(N, D, padding_idx=padding)
        self.question_module = QuestionModule(ques_feat_size, self.embeddings,
                                              encoder)
        self.image_features_resize = nn.Linear(self.image_feat_size,
                                               ques_feat_size)
        self.method = method
        if self.method == 'dot':
            self.classifier = AnswerModule(ques_feat_size,
                                           num_classes,
                                           use_batchnorm=True,
                                           dropout=0.5)
        if self.method == 'concat':
            self.classifier = AnswerModule(ques_feat_size +
                                           image_feature_size * 7,
                                           num_classes,
                                           use_batchnorm=True,
                                           dropout=0.5)
Exemple #11
0
    def __init__(self, **kwargs):
        if 'question_h5' not in kwargs:
            raise ValueError('Must give question_h5')
        if 'image_feature_h5' not in kwargs:
            raise ValueError('Must give image_feature_h5')

        if 'lidar_feature_h5' not in kwargs:
                raise ValueError('Must give lidar_feature_h5')

        image_feature_h5_path = kwargs.pop('image_feature_h5')
        load_lidar=kwargs.pop('load_lidar')
        lidar_feature_h5_path = kwargs.pop('lidar_feature_h5')
        vocab_path = kwargs.pop('vocab')
        vocab = load_vocab(vocab_path)
        question_h5_path = kwargs.pop('question_h5')
        print('Reading questions from ', question_h5_path)
        with h5py.File(question_h5_path, 'r') as question_h5:
            self.dataset = ArgoDataset(
                question_h5, image_feature_h5_path, lidar_feature_h5_path,vocab=vocab,load_lidar=load_lidar)
        kwargs['collate_fn'] = argo_collate
        super(ArgoDataLoader, self).__init__(self.dataset, **kwargs)
Exemple #12
0
    def __init__(self, **kwargs):
        if 'question_h5' not in kwargs:
            raise ValueError('Must give question_h5')
        if 'image_feature_h5' not in kwargs:
            raise ValueError('Must give image_feature_h5')

        if 'lidar_feature_h5' not in kwargs:
            raise ValueError('Must give lidar_feature_h5')

        image_feature_h5_path = kwargs.pop('image_feature_h5')

        # self.image_feature_h5 = h5py.File(image_feature_h5_path, 'r')
        # print('Reading image features from ', image_feature_h5_path)

        lidar_feature_h5_path = kwargs.pop('lidar_feature_h5')
        load_lidar = kwargs.pop('load_lidar')

        # print('Reading lidar features from ', lidar_feature_h5_path)
        # self.lidar_feature_h5 = h5py.File(lidra_feature_h5_path, 'r')

        # image
        # lidar

        vocab_path = kwargs.pop('vocab')
        vocab = load_vocab(vocab_path)
        question_h5_path = kwargs.pop('question_h5')
        # disable_lidar=kwargs.pop('disable_lidar')
        print('Reading questions from ', question_h5_path)
        with h5py.File(question_h5_path, 'r') as question_h5:
            self.dataset = ArgoDataset(question_h5,
                                       image_feature_h5_path,
                                       lidar_feature_h5_path,
                                       vocab=vocab,
                                       load_lidar=load_lidar)
        kwargs['collate_fn'] = argo_collate

        # file closed at this point but allquestion are store in variable
        super(ArgoDataLoader, self).__init__(self.dataset, **kwargs)
Exemple #13
0
    def __init__(self,
                 args,
                 ques_feat_size,
                 image_feature_size,
                 lidar_feature_size,
                 num_classes,
                 qa=None,
                 encoder='lstm',
                 method='concat',
                 grouping='single_scale'):
        super(LIDAR_MODEL, self).__init__()
        self.qa = qa
        self.image_feat_size = image_feature_size
        self.vocab = load_vocab(os.path.join(args.input_base, args.vocab))
        N = len(self.vocab['question_token_to_idx'])
        D = 200
        padding = self.vocab['question_token_to_idx']['<NULL>']
        self.embeddings = nn.Embedding(N, D, padding_idx=padding)
        self.question_module = QuestionModule(ques_feat_size, self.embeddings,
                                              encoder)
        self.method = method
        self.grouping = grouping

        if self.grouping == 'single_scale':
            self.lidar_module = LidarSsgModule(normal_channel=False)
        if self.grouping == 'multi_scale':
            self.lidar_module = LidarMsgModule(normal_channel=False)

        if self.method == 'dot':
            self.classifier = AnswerModule(ques_feat_size,
                                           num_classes,
                                           use_batchnorm=True,
                                           dropout=0.5)
        if self.method == 'concat':
            self.classifier = AnswerModule(ques_feat_size + 1024,
                                           num_classes,
                                           use_batchnorm=True,
                                           dropout=0.5)
Exemple #14
0
 def __init__(self,
              args,
              ques_feat_size,
              image_feature_size,
              lidar_feature_size,
              num_classes,
              qa=None,
              encoder='lstm',
              grouping='single_scale'):
     super(SAN_LIDAR, self).__init__()
     self.qa = qa
     self.image_feat_size = image_feature_size
     self.vocab = load_vocab(os.path.join(args.input_base, args.vocab))
     N = len(self.vocab['question_token_to_idx'])
     D = 200
     padding = self.vocab['question_token_to_idx']['<NULL>']
     self.embeddings = nn.Embedding(N, D, padding_idx=padding)
     self.question_module = QuestionModule(ques_feat_size, self.embeddings,
                                           encoder)
     self.attention = StackedAttention(ques_feat_size, 512, 512, 2)
     self.classifier = AnswerModule(512,
                                    num_classes, (256, ),
                                    use_batchnorm=True,
                                    dropout=0.5)  #3584 if method is concat
     # self.linweights=nn.Linear(512,7)
     self.linweights = nn.Sequential(
         nn.Linear(512, 256),
         nn.ReLU(inplace=True),
         nn.Dropout(0.5),
         nn.Linear(256, 7),  #no of glimpses
         nn.Sigmoid())
     self.grouping = grouping
     if self.grouping == 'single_scale':
         self.lidar_module = LidarSsgModule(normal_channel=False)
     if self.grouping == 'single_scale':
         self.lidar_module = LidarMsgModule(normal_channel=False)
def visualize_loop(args, val_loader):

    image_feature_size = 512
    lidar_feature_size = 1024

    if args.model_type == 'SAN':
        question_feat_size = 512
        model = SAN(args,
                    question_feat_size,
                    image_feature_size,
                    lidar_feature_size,
                    num_classes=34,
                    qa=None,
                    encoder=args.encoder_type,
                    method='hierarchical')
    if args.model_type == 'MCB':
        question_feat_size = 512
        model = MCB(args,
                    question_feat_size,
                    image_feature_size,
                    lidar_feature_size,
                    num_classes=34,
                    qa=None,
                    encoder=args.encoder_type,
                    method='hierarchical')
    if args.model_type == 'MFB':
        question_feat_size = 512
        # image_feature_size=512
        model = MFB(args,
                    question_feat_size,
                    image_feature_size,
                    lidar_feature_size,
                    num_classes=34,
                    qa=None,
                    encoder=args.encoder_type,
                    method='hierarchical')
    if args.model_type == 'MLB':
        question_feat_size = 1024
        image_feature_size = 512
        model = MLB(args,
                    question_feat_size,
                    image_feature_size,
                    lidar_feature_size,
                    num_classes=34,
                    qa=None,
                    encoder=args.encoder_type,
                    method='hierarchical')
    if args.model_type == 'MUTAN':
        question_feat_size = 1024
        image_feature_size = 512
        model = MUTAN(args,
                      question_feat_size,
                      image_feature_size,
                      lidar_feature_size,
                      num_classes=34,
                      qa=None,
                      encoder=args.encoder_type,
                      method='hierarchical')
    if args.model_type == 'DAN':
        question_feat_size = 512
        model = DAN(args,
                    question_feat_size,
                    image_feature_size,
                    lidar_feature_size,
                    num_classes=34,
                    qa=None,
                    encoder=args.encoder_type,
                    method='hierarchical')

    data = load_weights(args, model, optimizer=None)
    if type(data) == list:
        model, optimizer, start_epoch, loss, accuracy = data
        print("Loaded  weights")
        print("Epoch: %d, loss: %.3f, Accuracy: %.4f " %
              (start_epoch, loss, accuracy),
              flush=True)
    else:
        print(" error occured while loading model training freshly")
        model = data
        return

    ###########################################################################multiple GPU use#
    # if torch.cuda.device_count() > 1:
    #     print("Using ", torch.cuda.device_count(), "GPUs!")
    #     model = nn.DataParallel(model)

    model.to(device=args.device)
    model.eval()

    import argoverse
    from argoverse.data_loading.argoverse_tracking_loader import ArgoverseTrackingLoader
    from argoverse.utils.json_utils import read_json_file
    from argoverse.map_representation.map_api import ArgoverseMap

    vocab = load_vocab(os.path.join(args.input_base, args.vocab))
    argoverse_loader = ArgoverseTrackingLoader(
        '../../../Data/train/argoverse-tracking')

    k = 1
    with torch.no_grad():
        for data in tqdm(val_loader):
            question, image_feature, ques_lengths, point_set, answer, image_name = data
            question = question.to(device=args.device)
            ques_lengths = ques_lengths.to(device=args.device)
            image_feature = image_feature.to(device=args.device)
            point_set = point_set.to(device=args.device)

            pred, wgt, energies = model(question, image_feature, ques_lengths,
                                        point_set)

            question = question.cpu().data.numpy()
            answer = answer.cpu().data.numpy()
            pred = F.softmax(pred, dim=1)
            pred = torch.argmax(pred, dim=1)
            pred = np.asarray(pred.cpu().data)
            wgt = wgt.cpu().data.numpy()
            energies = energies.squeeze(1).cpu().data.numpy()
            ques_lengths = ques_lengths.cpu().data.numpy()
            pat = re.compile(r'(.*)@(.*)')
            _, keep = np.where([answer == pred])
            temp_batch_size = question.shape[0]
            for b in range(temp_batch_size):
                q = get_ques(question[b], ques_lengths[b], vocab)
                ans = get_ans(answer[b])
                pred_ans = get_ans(pred[b])
                # print(q,ans)
                c = list(re.findall(pat, image_name[b]))[0]
                log_id = c[0]
                idx = int(c[1])
                print(k)
                argoverse_data = argoverse_loader.get(log_id)
                if args.model_type == 'SAN':
                    plot_att(argoverse_data, idx, wgt[b, :, 1, :], energies[b],
                             q, ans, args.save_dir, k, pred_ans)
                if args.model_type == 'MCB':
                    plot_att(argoverse_data, idx, wgt[b], energies[b], q, ans,
                             args.save_dir, k, pred_ans)
                if args.model_type == 'MFB':
                    plot_att(argoverse_data, idx, wgt[b, :, :, 1], energies[b],
                             q, ans, args.save_dir, k, pred_ans)
                if args.model_type == 'MLB':
                    plot_att(argoverse_data, idx, wgt[b, :, 3, :], energies[b],
                             q, ans, args.save_dir, k, pred_ans)
                if args.model_type == 'MUTAN':  #only two glimpses
                    plot_att(argoverse_data, idx, wgt[b, :, 1, :], energies[b],
                             q, ans, args.save_dir, k, pred_ans)
                if args.model_type == 'DAN':  #only two memory
                    plot_att(argoverse_data, idx, wgt[b, :, 1, :], energies[b],
                             q, ans, args.save_dir, k, pred_ans)

                k = k + 1