예제 #1
0
    def load_data(self):
        if self.signature == 'youtube2text':
            print 'loading youtube2text %s features' % self.video_feature
            dataset_path = common.get_rab_dataset_base_path(
            ) + 'youtube2text_iccv15/'
            # dataset_path = common.get_rab_dataset_base_path()
            self.train = common.load_pkl(dataset_path + 'train.pkl')
            self.valid = common.load_pkl(dataset_path + 'valid.pkl')
            self.test = common.load_pkl(dataset_path + 'test.pkl')
            self.CAP = common.load_pkl(dataset_path + 'CAP.pkl')
            self.FEAT = common.load_pkl(dataset_path +
                                        'FEAT_key_vidID_value_features.pkl')

            self.train_ids = ['vid%s' % i for i in range(1, 1201)]
            self.valid_ids = ['vid%s' % i for i in range(1201, 1301)]
            self.test_ids = ['vid%s' % i for i in range(1301, 1971)]
        elif self.signature == 'lsmdc':
            print 'loading lsmdc %s features' % self.video_feature
            # dataset_path = common.get_rab_dataset_base_path()+'youtube2text_iccv15/'
            dataset_path = common.get_rab_dataset_base_path() + 'lsmdc/'
            self.train = common.load_pkl(dataset_path + 'train.pkl')
            self.valid = common.load_pkl(dataset_path + 'valid.pkl')
            self.test = common.load_pkl(dataset_path + 'test.pkl')
            self.CAP = common.load_pkl(dataset_path + 'CAP.pkl')
            self.FEAT = common.load_pkl(dataset_path +
                                        'FEAT_key_vidID_value_features.pkl')
            # self.train_ids = ['vid%s'%i for i in range(1,100)]
            # self.valid_ids = ['vid%s'%i for i in range(101,200)]
            # self.test_ids = ['vid%s'%i for i in range(201,300)]
            self.train_ids = self.train
            self.valid_ids = self.valid
            self.test_ids = self.test
        else:
            raise NotImplementedError()

        self.worddict = common.load_pkl(dataset_path + 'worddict.pkl')
        self.word_idict = dict()
        # wordict start with index 2
        for kk, vv in self.worddict.iteritems():
            self.word_idict[vv] = kk
        self.word_idict[0] = '<eos>'
        self.word_idict[1] = 'UNK'

        if self.video_feature == 'googlenet':
            self.ctx_dim = 1024
        else:
            raise NotImplementedError()
        self.kf_train = common.generate_minibatch_idx(len(self.train),
                                                      self.mb_size_train)
        self.kf_valid = common.generate_minibatch_idx(len(self.valid),
                                                      self.mb_size_test)
        self.kf_test = common.generate_minibatch_idx(len(self.test),
                                                     self.mb_size_test)
예제 #2
0
    def load_data(self):
        if self.signature == 'youtube2text':
            print 'loading youtube2text %s features'%self.video_feature
            dataset_path = common.get_rab_dataset_base_path()+'youtube2text_iccv15/'
            # dataset_path = common.get_rab_dataset_base_path()
            self.train = common.load_pkl(dataset_path + 'train.pkl')
            self.valid = common.load_pkl(dataset_path + 'valid.pkl')
            self.test = common.load_pkl(dataset_path + 'test.pkl')
            self.CAP = common.load_pkl(dataset_path + 'CAP.pkl')
            self.FEAT = common.load_pkl(dataset_path + 'FEAT_key_vidID_value_features.pkl')

            self.train_ids = ['vid%s'%i for i in range(1,1201)]
            self.valid_ids = ['vid%s'%i for i in range(1201,1301)]
            self.test_ids = ['vid%s'%i for i in range(1301,1971)]
        elif self.signature == 'lsmdc':
            print 'loading lsmdc %s features'%self.video_feature
            # dataset_path = common.get_rab_dataset_base_path()+'youtube2text_iccv15/'
            dataset_path = common.get_rab_dataset_base_path()+'lsmdc/'
            self.train = common.load_pkl(dataset_path + 'train.pkl')
            self.valid = common.load_pkl(dataset_path + 'valid.pkl')
            self.test = common.load_pkl(dataset_path + 'test.pkl')
            self.CAP = common.load_pkl(dataset_path + 'CAP.pkl')
            self.FEAT = common.load_pkl(dataset_path + 'FEAT_key_vidID_value_features.pkl')
            # self.train_ids = ['vid%s'%i for i in range(1,100)]
            # self.valid_ids = ['vid%s'%i for i in range(101,200)]
            # self.test_ids = ['vid%s'%i for i in range(201,300)]
            self.train_ids = self.train
            self.valid_ids = self.valid
            self.test_ids = self.test
        else:
            raise NotImplementedError()
                
        self.worddict = common.load_pkl(dataset_path + 'worddict.pkl')
        self.word_idict = dict()
        # wordict start with index 2
        for kk, vv in self.worddict.iteritems():
            self.word_idict[vv] = kk
        self.word_idict[0] = '<eos>'
        self.word_idict[1] = 'UNK'
        
        if self.video_feature == 'googlenet':
            self.ctx_dim = 1024
        else:
            raise NotImplementedError()
        self.kf_train = common.generate_minibatch_idx(
            len(self.train), self.mb_size_train)
        self.kf_valid = common.generate_minibatch_idx(
            len(self.valid), self.mb_size_test)
        self.kf_test = common.generate_minibatch_idx(
            len(self.test), self.mb_size_test)
    def load_data(self):
        if self.signature == 'youtube2text':
            print 'loading youtube2text %s features'%self.video_feature
            dataset_path = common.get_rab_dataset_base_path()+'msvd_data/'
	    self.train = common.load_pkl(dataset_path + 'train.pkl')
            self.valid = common.load_pkl(dataset_path + 'valid.pkl')
            self.test = common.load_pkl(dataset_path + 'test.pkl')
            self.CAP = common.load_pkl(dataset_path + 'CAP.pkl')
            self.train_ids = ['vid%s'%i for i in range(1,1201)]
            self.valid_ids = ['vid%s'%i for i in range(1201,1301)]
            self.test_ids = ['vid%s'%i for i in range(1301,1971)]
        else:
            raise NotImplementedError()
                
        self.worddict = common.load_pkl(dataset_path + 'worddict.pkl')
        self.word_idict = dict()
        # wordict start with index 2
        for kk, vv in self.worddict.iteritems():
            self.word_idict[vv] = kk
        self.word_idict[0] = '<eos>'
        self.word_idict[1] = 'UNK'
        
        if self.video_feature == 'googlenet':
            self.ctxg_dim = 1024#global_feature dimension
            self.ctxl_dim = 4096#local_feature dimension
            self.ctxglm_dim = 1024#fused dimension
            self.ctxm_dim = 4096#motion_feature dimension
        else:
            raise NotImplementedError()
        self.kf_train = common.generate_minibatch_idx(
            len(self.train), self.mb_size_train)
        self.kf_valid = common.generate_minibatch_idx(
            len(self.valid), self.mb_size_test)
        self.kf_test = common.generate_minibatch_idx(
            len(self.test), self.mb_size_test)
예제 #4
0
    def load_data(self):
        if self.signature == 'youtube2text':
            print 'loading youtube2text %s features' % self.video_feature
            dataset_path = common.get_rab_dataset_base_path() + 'data/'
            self.train = common.load_pkl(dataset_path + 'train.pkl')
            self.valid = common.load_pkl(dataset_path + 'valid.pkl')
            self.test = common.load_pkl(dataset_path + 'test.pkl')
            self.CAP = common.load_pkl(dataset_path + 'CAP.pkl')
            self.s_o = common.load_pkl(
                dataset_path + 'sem_obj_new.pkl')  # load extracted visual tags
            self.train_ids = ['vid%s' % i for i in range(1, 1201)]
            self.valid_ids = ['vid%s' % i for i in range(1201, 1301)]
            self.test_ids = ['vid%s' % i for i in range(1301, 1971)]
        else:
            raise NotImplementedError()

        self.worddict = common.load_pkl(dataset_path + 'worddict.pkl')

        # adding the 3000 words in visual genome to our vocabulary
        with open(dataset_path + 'vg_list', 'r') as f:
            line = f.readline()
            line = line[:-1]
            semantic_object = []
            while line:
                semantic_object.append(line)
                line = f.readline()
                line = line[:-1]

        new_s_o = []
        new_s_os = []
        for o in semantic_object:
            new_s_o = o.split('_')
            for i in new_s_o:
                if i not in new_s_os:
                    new_s_os.append(i)
        new_s_os_l = len(new_s_os)

        for oo in new_s_os:
            word_l = len(self.worddict)
            # if oo not in self.word_idict.values():
            if self.worddict.has_key(oo):
                continue
            else:
                self.worddict[oo] = word_l
        self.word_idict = dict()
        # wordict start with index 2
        for kk, vv in self.worddict.iteritems():
            self.word_idict[vv] = kk
        self.word_idict[0] = '<eos>'
        self.word_idict[1] = 'UNK'

        if self.video_feature == 'resnet152':
            self.ctx_dim = 4096
        else:
            raise NotImplementedError()
        self.kf_train = common.generate_minibatch_idx(len(self.train),
                                                      self.mb_size_train)
        self.kf_valid = common.generate_minibatch_idx(len(self.valid),
                                                      self.mb_size_test)
        self.kf_test = common.generate_minibatch_idx(len(self.test),
                                                     self.mb_size_test)
예제 #5
0
    def load_data(self):


        if self.signature == 'youtube2text' or self.signature == 'trecvid':
            print 'loading {} {} features'.format(self.signature, self.video_feature)
            if self.data_dir=='':
                dataset_path = common.get_rab_dataset_base_path()+'youtube2text/'+self.video_feature
            else:
                dataset_path = self.data_dir

            # dataset_path = common.get_rab_dataset_base_path()
            self.train = common.load_pkl(os.path.join(dataset_path ,'train.pkl'))
            self.valid = common.load_pkl(os.path.join(dataset_path ,'valid.pkl'))
            self.test = common.load_pkl(os.path.join(dataset_path ,'test.pkl'))
            self.CAP = common.load_pkl(os.path.join(dataset_path , 'CAP.pkl'))


            # self.FEAT = common.load_pkl(os.path.join(dataset_path , 'FEAT_key_vidID_value_features_'+self.proc+'.pkl'))
            self.load_feats(dataset_path)

            self.train_ids = list(set(self.train[i].split('_')[0] for i in range(len(self.train))))
            self.valid_ids = list(set(self.valid[i].split('_')[0] for i in range(len(self.valid))))
            self.test_ids = list(set(self.test[i].split('_')[0] for i in range(len(self.test))))


        elif self.signature == 'lsmdc' or self.signature == 'lsmdc16' or self.signature == 'mvad' or self.signature == 'mpii' or self.signature == 'tacos':
            print 'loading {} {} features'.format(self.signature, self.video_feature)
            dataset_path = self.data_dir
            self.train = common.load_pkl(os.path.join(dataset_path, 'train.pkl'))
            self.valid = common.load_pkl(os.path.join(dataset_path, 'valid.pkl'))
            self.test = common.load_pkl(os.path.join(dataset_path, 'test.pkl'))
            self.CAP = common.load_pkl(os.path.join(dataset_path, 'CAP.pkl'))

            self.train_ids = self.train
            self.valid_ids = self.valid
            self.test_ids = self.test

            if self.signature == 'lsmdc16':
                self.btest = common.load_pkl(os.path.join(dataset_path, 'blindtest.pkl'))
                self.btest_ids = self.btest


        elif self.signature == 'ysvd':
            print 'loading ysvd %s features'%self.video_feature
            dataset_path = common.get_rab_dataset_base_path()+'ysvd/'

            self.all = common.load_pkl(os.path.join(dataset_path, 'all_vids.pkl'))
            self.CAP = common.load_pkl(os.path.join(dataset_path, 'CAP.pkl'))
            self.FEAT = common.load_pkl(os.path.join(dataset_path, 'FEAT_key_vidID_value_features.pkl'))

            self.train = self.all[0:500]
            self.valid = self.all[501:750]
            self.test = self.all[751:1000]

            self.train_ids = self.train
            self.valid_ids = self.valid
            self.test_ids = self.test

        elif self.signature == 'vtt16' or self.signature == 'vtt17':
            print 'loading {} {} features'.format(self.signature, self.video_feature)

            if self.data_dir=='':
                dataset_path = common.get_rab_dataset_base_path()+'vtt/'+self.video_feature
            else:
                dataset_path = self.data_dir

            self.train = common.load_pkl(os.path.join(dataset_path, 'train.pkl'))
            self.valid = common.load_pkl(os.path.join(dataset_path, 'valid.pkl'))
            self.test = common.load_pkl(os.path.join(dataset_path, 'test.pkl'))
            self.CAP = common.load_pkl(os.path.join(dataset_path, 'CAP.pkl'))


            self.load_feats(dataset_path)

            # Get list of just the videoID, instead of videoID_CapID. Use set to ignore duplicates, then recast to list
            self.train_ids = list(set(self.train[i].split('_')[0] for i in range(len(self.train))))
            self.valid_ids = list(set(self.valid[i].split('_')[0] for i in range(len(self.valid))))
            self.test_ids = list(set(self.test[i].split('_')[0] for i in range(len(self.test))))

            self.test_ids = self.test_ids #only for testing

        else:
            raise NotImplementedError()
                
        self.worddict = common.load_pkl(os.path.join(dataset_path ,'worddict.pkl'))
        self.word_idict = dict()
        # wordict start with index 2
        for kk, vv in self.worddict.iteritems():
            self.word_idict[vv] = kk
        self.word_idict[0] = '<eos>'
        self.word_idict[1] = 'UNK'

        if self.video_feature == 'googlenet':
            self.ctx_dim = 1024
        elif self.video_feature == 'resnet' or self.video_feature == 'resnet152':
            if self.proc=='nostd':
                self.ctx_dim = 2048
            elif self.proc=='pca':
                self.ctx_dim=1024
        elif self.video_feature == 'nasnetalarge':
            self.ctx_dim = 4032
        elif self.video_feature == 'pnasnet5large':
            self.ctx_dim = 4320
        elif self.video_feature == 'polynet':
            self.ctx_dim = 2048
        elif self.video_feature == 'senet154':
            self.ctx_dim = 2048
        elif self.video_feature == 'densenet121':
            raise NotImplementedError()
        elif self.video_feature == 'c3d':
            if self.proc=='nostd':
                self.ctx_dim = 4101
            elif self.proc=='pca':
                self.ctx_dim=1024
        elif self.video_feature == 'c3d_resnet':
            if self.proc=='nostd':
                self.ctx_dim = 6149
            elif self.proc=='pca':
                self.ctx_dim=2048
            elif self.proc=='pca512':
                self.ctx_dim=1024
            elif self.proc=='pca_c3d':
                self.ctx_dim=3072
        else:
            raise NotImplementedError()

        print "ctx_dim: "+str(self.ctx_dim)
        self.kf_train = common.generate_minibatch_idx(
            len(self.train), self.mb_size_train)
        self.kf_valid = common.generate_minibatch_idx(
            len(self.valid), self.mb_size_test)
        self.kf_test = common.generate_minibatch_idx(
            len(self.test), self.mb_size_test)

        if self.dec == 'multi-stdist':
            self.skip_vectors = common.load_pkl(os.path.join(dataset_path,'skip_vectors.pkl'))