Esempio n. 1
0
    def forward(self, v, q):
        self.opt = config.parse_opt()
        w_emb = self.w_emb(q)
        print(v.shape)
        q_emb = self.q_emb(w_emb)
        #print(q_emb.shape)
        v_emb = self.v_fc(v)

        #print(v_emb.shape)
        r_emb = self.r_emb(v_emb, q_emb)
        #print(r_emb.shape)
        w_att = self.v_att(v_emb, q_emb)
        v_att = v_emb * w_att
        vatt = torch.squeeze(torch.sum(v_att, 1, keepdim=True))

        wr_att = self.r_att(r_emb, q_emb)
        r_att = r_emb * wr_att
        ratt = torch.squeeze(torch.sum(r_att, 1, keepdim=True))
        #print(ratt.shape)

        joint_proj = vatt + q_emb
        #print(joint_proj.shape)
        joint_proj = joint_proj + ratt

        logits = self.classifier(joint_proj)
        return logits
Esempio n. 2
0
 def __init__(self):
     self.opt=config.parse_opt()
     self.CSV_TYPE={'FrameQA': '_frameqa_question.csv',
       'Count': '_count_question.csv',
       'Trans': '_transition_question.csv',
       'Action' : '_action_question.csv'
      }
Esempio n. 3
0
def load_dataset(mode, num=None):
    if mode == 'Train':
        textset, _ = _read_from_csv()
    else:
        _, textset = _read_from_csv()
    entries = []
    opt = config.parse_opt()
    hdf5_result = json.load(open(opt.HDF5_JSON, 'r'))['results']
    count = 0
    idx = list(textset.index)
    if num == None:
        num = len(idx) + 10
    for row in textset.iterrows():
        question = row[1]['question']
        image_idx = idx[count]
        gif_name = row[1]['gif_name']
        answer = row[1]['answer']
        if count > num:
            break
        vid = str(row[1]['key'])
        entry = {
            'question': question,
            'answer': answer,
            'gif_name': gif_name,
            'index': image_idx,
            'vid_id': vid,
        }
        entries.append(entry)
        count += 1
    return entries
Esempio n. 4
0
	def __init__(self):
		super(MFB,self).__init__()
		self.opt=config.parse_opt()
		self.dropout=nn.Dropout(self.opt.MFH_DROPOUT)
                self.v_proj=nn.Linear(self.opt.NUM_GLIMPSE*self.opt.NUM_HIDDEN,self.opt.MFB_DIM * self.opt.POOLING_SIZE)
                self.q_proj=nn.Linear(self.opt.NUM_HIDDEN,self.opt.MFB_DIM * self.opt.POOLING_SIZE)
		self.v_proj1=nn.Linear(self.opt.NUM_GLIMPSE*self.opt.NUM_HIDDEN,self.opt.MFB_DIM * self.opt.POOLING_SIZE)
                self.q_proj1=nn.Linear(self.opt.NUM_HIDDEN,self.opt.MFB_DIM * self.opt.POOLING_SIZE)
Esempio n. 5
0
 def __init__(self, hidden, mid, dropout):
     super(Attention, self).__init__()
     self.opt = config.parse_opt()
     self.v_proj = nn.Linear(hidden, mid)
     self.q_proj = nn.Linear(hidden, mid)
     self.att = nn.Linear(mid, 1)
     self.softmax = nn.Softmax()
     self.dropout = nn.Dropout(dropout)
Esempio n. 6
0
def train(model, train_loader, test_loader, opt):
    opt = config.parse_opt()
    optim = optimizer.get_std_opt(model, opt)
    num_epochs = opt.EPOCHS
    #optim=torch.optim.Adamax(model.parameters())
    logger = utils.Logger(
        os.path.join('./save_models', 'log' + str(opt.SAVE_NUM) + '.txt'))
    dict_file = load_pkl(opt.ANET_LABEL2ANS)
    log_hyperpara(logger, opt)
    best_eval_score = 0
    train_file = load_pkl(opt.ANET_TRAIN_DICT)

    for epoch in range(num_epochs):
        total_loss = 0
        train_score = 0.0
        t = time.time()
        t_time = 0.0
        for i, (conct, q, l, ques_id) in enumerate(train_loader):
            starttime = datetime.datetime.now()
            conct = conct.float().cuda()
            v = Variable(conct).float().cuda()
            q = Variable(q).cuda()
            l = Variable(l).float().cuda()
            pred = model(v, q)
            loss = instance_bce_with_logits(pred, l.cuda())
            loss.backward()
            nn.utils.clip_grad_norm(model.parameters(), 0.25)
            optim.step()
            optim.zero_grad()
            batch_score = compute_score(pred, ques_id, train_file, dict_file)
            total_loss += loss * v.size(0)
            train_score += batch_score
            print 'Epoch:', epoch, 'batch:', i + 1, 'bathc_score:', batch_score, 'loss:', loss
            endtime = datetime.datetime.now()
            t_time += (endtime - starttime).microseconds
        logger.write('epoch %d, time: %.2f' % (epoch, time.time() - t))
        logger.write('time cost: %.2f' % (t_time / 1000000))
        total_loss /= len(train_loader.dataset)
        train_score = 100 * train_score / len(train_loader.dataset)
        model.train(False)
        evaluate_score, test_loss, w0, w9 = evaluate(model, test_loader, opt,
                                                     epoch)
        w0 = w0 * 100
        w9 = w9 * 100
        print 'Epoch:', epoch, 'evaluation w0:', w0, ' w9:', w9
        logger.write('\twup0: %.2f, wup9: %.2f' % (w0, w9))
        print 'Epoch:', epoch, 'evaluation score:', 100 * evaluate_score, ' loss:', test_loss
        logger.write('\ttrain_loss: %.2f, accuracy: %.2f' %
                     (total_loss, train_score))
        logger.write('\teval accuracy: %.2f ' % (100 * evaluate_score))
        logger.write('\teval loss: %.2f ' % (test_loss))
        if evaluate_score > best_eval_score:
            best_eval_score = evaluate_score
        model.train(True)
    logger.write('best accuracy %.2f' % (100 * best_eval_score))
Esempio n. 7
0
def _read_from_csv():
    opt = config.parse_opt()
    train_path = os.path.join(opt.TEXT_DIR,
                              ('Train' + CSV_TYPE[opt.QUESTION_TYPE]))
    test_path = os.path.join(opt.TEXT_DIR,
                             ('Test' + CSV_TYPE[opt.QUESTION_TYPE]))
    text_train = pd.read_csv(train_path, sep='\t')
    text_test = pd.read_csv(test_path, sep='\t')
    text_train = text_train.set_index('vid_id')
    text_test = text_test.set_index('vid_id')
    return text_train, text_test
Esempio n. 8
0
def build_baseline(dataset):
    opt = config.parse_opt()
    w_emb = WordEmbedding(dataset.dictionary.ntokens(), 300, opt.EMB_DROPOUT)
    q_emb = QuestionEmbedding(300, opt.NUM_HIDDEN, opt.NUM_LAYER, opt.BIDIRECT,
                              opt.L_RNN_DROPOUT)
    v_emb = VideoEmbedding(opt.C3D_SIZE + opt.RES_SIZE, opt.NUM_HIDDEN,
                           opt.NUM_LAYER, opt.BIDIRECT, opt.L_RNN_DROPOUT)
    v_att = Attention(opt.NUM_HIDDEN, opt.MID_DIM, opt.FC_DROPOUT)
    classifier = SimpleClassifier(opt.NUM_HIDDEN, opt.MID_DIM, 1,
                                  opt.FC_DROPOUT)
    return BaseModel(w_emb, q_emb, v_att, classifier, v_emb)
Esempio n. 9
0
def build_baseline(dataset,opt):
    opt=config.parse_opt()
    w_emb=WordEmbedding(dataset.dictionary.ntokens(),300,opt.EMB_DROPOUT)
    q_emb=QuestionEmbedding(300,opt.NUM_HIDDEN,opt.NUM_LAYER,opt.BIDIRECT,opt.L_RNN_DROPOUT)
    v_emb=VideoEmbedding(opt.C3D_SIZE+opt.RES_SIZE,opt.NUM_HIDDEN,opt.NUM_LAYER,opt.BIDIRECT,opt.L_RNN_DROPOUT)
    v_att=Attention(opt.NUM_HIDDEN,opt.MID_DIM,opt.FC_DROPOUT)
    r_att=Attention(opt.NUM_HIDDEN,opt.MID_DIM,opt.FC_DROPOUT)
    v_fc=Videofc(opt.GLIMPSE,opt.C3D_SIZE+opt.RES_SIZE,opt.NUM_HIDDEN,opt.FC_DROPOUT)
    a_emb=AnswerEmbedding(300,opt.NUM_HIDDEN,opt.NUM_LAYER,opt.BIDIRECT,opt.L_RNN_DROPOUT)
    rela_emb = Rela_Module(opt.NUM_HIDDEN*3,opt.NUM_HIDDEN,opt.NUM_HIDDEN)
    classifier=SimpleClassifier(opt.NUM_HIDDEN,opt.MID_DIM,dataset.num_ans,opt.FC_DROPOUT)
    return BaseModel(w_emb,q_emb,v_emb,a_emb,v_att,v_fc,rela_emb,r_att,classifier,opt)
Esempio n. 10
0
def main():
    opt = config.parse_opt()

    folder = os.path.join(config.OUTPUT_DIR, opt.ID + '_pred')
    log_file = os.path.join(config.LOG_DIR, opt.ID)

    logger = get_logger(log_file)

    check_mkdir(folder)

    pred(opt, folder, logger)

    visualize_pred(opt, folder, 'val', logger)
Esempio n. 11
0
    def __init__(self, hidden, mid, dropout):
        super(Gate_combine, self).__init__()
        self.opt = config.parse_opt()

        self.a_proj = nn.Linear(hidden, mid)
        self.a_att = nn.Linear(mid, 1)
        self.f_proj = nn.Linear(hidden, mid)
        self.f_att = nn.Linear(mid, 1)
        self.q_proj = nn.Linear(hidden, mid)
        self.q_att = nn.Linear(mid, 1)

        self.sig = nn.Sigmoid()
        self.dropout = nn.Dropout(dropout)
Esempio n. 12
0
 def forward(self, v, q):
     self.opt = config.parse_opt()
     w_emb = self.w_emb(q)
     v_embedding = self.v_emb(v)
     q_emb = self.q_emb(w_emb)
     w_att = self.v_att(v_embedding, q_emb)
     #v_proj=self.v_proj(v_embedding)
     #q_proj=torch.unsqueeze(self.q_proj(q_emb),1)
     #vq_proj=F.relu(v_proj +q_proj)
     #proj=torch.squeeze(self.att(vq_proj))
     #w_att=torch.unsqueeze(self.softmax(proj),2)
     v_att = v_embedding * w_att
     vatt = torch.squeeze(torch.sum(v_att, 1, keepdim=True))
     joint_proj = vatt + q_emb
     logits = torch.squeeze(self.classifier(joint_proj))
     return logits
Esempio n. 13
0
 def __init__(self, question_type, dictionary, mode):
     super(FeatureDataset, self).__init__()
     self.opt = config.parse_opt()
     utils.assert_in_type(question_type)
     if question_type == 'FrameQA':
         self.ans2label = pkl.load(open('./data/ans2label.pkl', 'rb'))
         self.label2ans = pkl.load(open('./data/label2ans.pkl', 'rb'))
         self.num_ans = len(self.ans2label)
     self.dictionary = dictionary
     entry_path = './data/entries_' + str(mode) + '.pkl'
     print('Load Dataset')
     self.entries = load_dataset(mode)
     print('Dataset\'s length is %d' % (len(self.entries)))
     self.tokenize()
     self.read_from_h5py()
     self.tensorize()
     '''
Esempio n. 14
0
def load_dataset(mode, num=None):
    if mode == 'Train':
        textset, _ = _read_from_csv()
    else:
        _, textset = _read_from_csv()
    entries = []
    opt = config.parse_opt()
    hdf5_result = json.load(open(opt.HDF5_JSON, 'r'))['results']
    count = 0
    idx = list(textset.index)
    if num == None:
        num = len(idx) + 10
    for row in textset.iterrows():
        question = row[1]['question']
        a1 = row[1]['question'] + row[1]['a1']
        a2 = row[1]['question'] + row[1]['a2']
        a3 = row[1]['question'] + row[1]['a3']
        a4 = row[1]['question'] + row[1]['a4']
        a5 = row[1]['question'] + row[1]['a5']
        image_idx = idx[count]
        gif_name = row[1]['gif_name']
        answer = row[1]['answer']

        if count > num:
            break
        vid = str(row[1]['key'])
        proposal_info = hdf5_result[gif_name[2:]]
        entry = {
            'a1': a1,
            'a2': a2,
            'a3': a3,
            'a4': a4,
            'a5': a5,
            'question': question,
            'answer': answer,
            'gif_name': gif_name,
            'index': image_idx,
            'vid_id': vid,
            'proposal_info': proposal_info
        }
        entries.append(entry)
        count += 1
    return entries
Esempio n. 15
0
    def forward(self, v, q, a1, a2, a3, a4, a5):
        self.opt = config.parse_opt()
        w_emb = self.w_emb(q)
        emb1 = self.w_emb(a1)
        emb2 = self.w_emb(a2)
        emb3 = self.w_emb(a3)
        emb4 = self.w_emb(a4)
        emb5 = self.w_emb(a5)
        print(v.shape)
        q_emb = self.q_emb(w_emb)
        #print(q_emb.shape)
        v_emb = self.v_fc(v)

        #print(v_emb.shape)
        r_emb = self.r_emb(v_emb, q_emb)
        #print(r_emb.shape)
        w_att = self.v_att(v_emb, q_emb)
        v_att = v_emb * w_att
        vatt = torch.squeeze(torch.sum(v_att, 1, keepdim=True))

        wr_att = self.r_att(r_emb, q_emb)
        r_att = r_emb * wr_att
        ratt = torch.squeeze(torch.sum(r_att, 1, keepdim=True))
        #print(ratt.shape)

        joint_proj = vatt + q_emb
        #print(joint_proj.shape)
        joint_proj = joint_proj + ratt

        cand1 = self.score(self.a_emb(emb1, joint_proj))
        cand2 = self.score(self.a_emb(emb2, joint_proj))
        cand3 = self.score(self.a_emb(emb3, joint_proj))
        cand4 = self.score(self.a_emb(emb4, joint_proj))
        cand5 = self.score(self.a_emb(emb5, joint_proj))

        score = torch.from_numpy(np.zeros((v.size(0), 5), dtype=np.float32))
        score[:, 0] = torch.squeeze(cand1)
        score[:, 1] = torch.squeeze(cand2)
        score[:, 2] = torch.squeeze(cand3)
        score[:, 3] = torch.squeeze(cand4)
        score[:, 4] = torch.squeeze(cand5)
        return score
Esempio n. 16
0
def label_img_with_ques_etm():
    opt = config.parse_opt()
    q_i_a_path = os.path.join(root_path, "data/train/All_QA_Pairs_train.txt")

    img_ques_dict = {}
    with open(q_i_a_path, "r") as f:
        for row in f:
            q_i_a = row.strip().split("|")
            img = q_i_a[0]
            ques = q_i_a[1]
            if (img in img_ques_dict):
                img_ques_dict[img].append(ques)
            else:
                img_ques_dict[img] = [ques]

    img_topic_dict = {}
    for img, qs in img_ques_dict.items():
        img_topic_vector = np.zeros(opt.ETM_TOP_NUM)
        for q in qs:
            words = VQADataProvider.text_to_list(q)
            q_t_v = etm_topic_distrib(words)
            img_topic_vector = np.add(img_topic_vector, q_t_v)
        img_topic_dict[img] = (np.argmax(img_topic_vector)).item()
    return img_topic_dict
Esempio n. 17
0
            word_len = min(len(tokens), self.opt.max_word_len)
            for _ in range(self.opt.max_word_len - word_len):
                tokens.append(0)
            sent_chars.append(tokens[:self.opt.max_word_len])

        for _ in range(sen_len, self.opt.max_len):
            sent_ids.append(0)
            sent_pos_ids.append(0)
            sent_chars.append([0] * self.opt.max_word_len)
        return sent_words[:
                          sen_len], sent_ids, sent_pos_ids, sent_chars, sen_len
        '''
        for w in sent_words[:sen_len]:
            tokens = [self.char2id.get(token, 1) for token in list(w)]
            word_len = min(len(tokens), self.opt.max_word_len)
            for _ in range(self.opt.max_word_len - word_len):
                tokens.append(0)
            sent_chars.append(tokens[: self.opt.max_word_len])

        for _ in range(sen_len, self.opt.max_len):
            sent_ids.append(0)
            sent_pos_ids.append(0)
            sent_chars.append([0] * self.opt.max_word_len)
        return sent_words[:sen_len], sent_ids, sent_pos_ids, sent_chars, sen_len
        '''


opt = config.parse_opt()
Prepare = DataPrepare(opt)
Prepare.prepare()
Esempio n. 18
0
            img = images[ann['image_id']]
            x1, y1, w, h = ann['box']
            image = Image.open(os.path.join(image_root,
                                            img['file_name'])).convert('RGB')
            if h <= w:
                nh, nw = int(224 / w * h), 224
            else:
                nh, nw = 224, int(224 / h * w)
            image = image.crop((x1, y1, x1 + w, y1 + h)).resize(
                (nw, nh), Image.ANTIALIAS)
            image = np.array(image).astype(np.float32)[:, :, ::-1]
            image -= np.array([103.939, 116.779, 123.68], dtype=np.float32)
            image = image.transpose((2, 0, 1))
            pad_image = np.zeros((3, 224, 224), dtype=np.float32)
            if nh <= nw:
                pad_image[:, (224 - nh) // 2:(224 - nh) // 2 + nh, :] = image
            else:
                pad_image[:, :, (224 - nw) // 2:(224 - nw) // 2 + nw] = image
            batch.append(pad_image)
        batch = Variable(xp.array(batch, dtype=xp.float32))
        feature = res(batch, layers=['pool5'])
        feature = cuda.to_cpu(feature['pool5'].data)
        ann_feats.extend(feature)
    np.save(os.path.join(target_save_dir, params['ann_feats']), ann_feats)


if __name__ == '__main__':

    args = config.parse_opt()
    params = vars(args)  # convert to ordinary dict
    extract_feature(params)
Esempio n. 19
0
 def __init__(self, word2idx=None, idx2word=None):
     self.opt = config.parse_opt()
     self.word2idx = word2idx
     self.idx2word = idx2word
Esempio n. 20
0
    ])

    valid_transform = transforms.Compose([
        transforms.CenterCrop(args.crop_size),
        # transforms.RandomHorizontalFlip(), # do we need to flip when eval?
        transforms.ToTensor(),
        transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
    ])

    train_dataloader = get_loader(opt, mode='train', transform=train_transform)
    valid_dataloader = get_loader(opt, mode='val', transform=valid_transform)

    print('load the dataset into memory...')
    print(
        'total iterations in training phase : {} \ntotal iterations in validation phase : {}'
        .format(len(train_dataloader), len(valid_dataloader)))

    trainer = Trainer(opt, train_dataloader, valid_dataloader)
    trainer.train()
    print('done')


if __name__ == "__main__":
    args = parse_opt()

    setup_logging(os.path.join('log.txt'))
    logging.info("\nrun arguments: %s",
                 json.dumps(vars(args), indent=4, sort_keys=True))

    main(args)
    print('done')