def load_data(self): if self.signature == 'youtube2text': print 'loading youtube2text %s features' % self.video_feature dataset_path = common.get_rab_dataset_base_path( ) + 'youtube2text_iccv15/' # dataset_path = common.get_rab_dataset_base_path() self.train = common.load_pkl(dataset_path + 'train.pkl') self.valid = common.load_pkl(dataset_path + 'valid.pkl') self.test = common.load_pkl(dataset_path + 'test.pkl') self.CAP = common.load_pkl(dataset_path + 'CAP.pkl') self.FEAT = common.load_pkl(dataset_path + 'FEAT_key_vidID_value_features.pkl') self.train_ids = ['vid%s' % i for i in range(1, 1201)] self.valid_ids = ['vid%s' % i for i in range(1201, 1301)] self.test_ids = ['vid%s' % i for i in range(1301, 1971)] elif self.signature == 'lsmdc': print 'loading lsmdc %s features' % self.video_feature # dataset_path = common.get_rab_dataset_base_path()+'youtube2text_iccv15/' dataset_path = common.get_rab_dataset_base_path() + 'lsmdc/' self.train = common.load_pkl(dataset_path + 'train.pkl') self.valid = common.load_pkl(dataset_path + 'valid.pkl') self.test = common.load_pkl(dataset_path + 'test.pkl') self.CAP = common.load_pkl(dataset_path + 'CAP.pkl') self.FEAT = common.load_pkl(dataset_path + 'FEAT_key_vidID_value_features.pkl') # self.train_ids = ['vid%s'%i for i in range(1,100)] # self.valid_ids = ['vid%s'%i for i in range(101,200)] # self.test_ids = ['vid%s'%i for i in range(201,300)] self.train_ids = self.train self.valid_ids = self.valid self.test_ids = self.test else: raise NotImplementedError() self.worddict = common.load_pkl(dataset_path + 'worddict.pkl') self.word_idict = dict() # wordict start with index 2 for kk, vv in self.worddict.iteritems(): self.word_idict[vv] = kk self.word_idict[0] = '<eos>' self.word_idict[1] = 'UNK' if self.video_feature == 'googlenet': self.ctx_dim = 1024 else: raise NotImplementedError() self.kf_train = common.generate_minibatch_idx(len(self.train), self.mb_size_train) self.kf_valid = common.generate_minibatch_idx(len(self.valid), self.mb_size_test) self.kf_test = common.generate_minibatch_idx(len(self.test), self.mb_size_test)
def load_data(self): if self.signature == 'youtube2text': print 'loading youtube2text %s features'%self.video_feature dataset_path = common.get_rab_dataset_base_path()+'youtube2text_iccv15/' # dataset_path = common.get_rab_dataset_base_path() self.train = common.load_pkl(dataset_path + 'train.pkl') self.valid = common.load_pkl(dataset_path + 'valid.pkl') self.test = common.load_pkl(dataset_path + 'test.pkl') self.CAP = common.load_pkl(dataset_path + 'CAP.pkl') self.FEAT = common.load_pkl(dataset_path + 'FEAT_key_vidID_value_features.pkl') self.train_ids = ['vid%s'%i for i in range(1,1201)] self.valid_ids = ['vid%s'%i for i in range(1201,1301)] self.test_ids = ['vid%s'%i for i in range(1301,1971)] elif self.signature == 'lsmdc': print 'loading lsmdc %s features'%self.video_feature # dataset_path = common.get_rab_dataset_base_path()+'youtube2text_iccv15/' dataset_path = common.get_rab_dataset_base_path()+'lsmdc/' self.train = common.load_pkl(dataset_path + 'train.pkl') self.valid = common.load_pkl(dataset_path + 'valid.pkl') self.test = common.load_pkl(dataset_path + 'test.pkl') self.CAP = common.load_pkl(dataset_path + 'CAP.pkl') self.FEAT = common.load_pkl(dataset_path + 'FEAT_key_vidID_value_features.pkl') # self.train_ids = ['vid%s'%i for i in range(1,100)] # self.valid_ids = ['vid%s'%i for i in range(101,200)] # self.test_ids = ['vid%s'%i for i in range(201,300)] self.train_ids = self.train self.valid_ids = self.valid self.test_ids = self.test else: raise NotImplementedError() self.worddict = common.load_pkl(dataset_path + 'worddict.pkl') self.word_idict = dict() # wordict start with index 2 for kk, vv in self.worddict.iteritems(): self.word_idict[vv] = kk self.word_idict[0] = '<eos>' self.word_idict[1] = 'UNK' if self.video_feature == 'googlenet': self.ctx_dim = 1024 else: raise NotImplementedError() self.kf_train = common.generate_minibatch_idx( len(self.train), self.mb_size_train) self.kf_valid = common.generate_minibatch_idx( len(self.valid), self.mb_size_test) self.kf_test = common.generate_minibatch_idx( len(self.test), self.mb_size_test)
def load_data(self): if self.signature == 'youtube2text': print 'loading youtube2text %s features'%self.video_feature dataset_path = common.get_rab_dataset_base_path()+'msvd_data/' self.train = common.load_pkl(dataset_path + 'train.pkl') self.valid = common.load_pkl(dataset_path + 'valid.pkl') self.test = common.load_pkl(dataset_path + 'test.pkl') self.CAP = common.load_pkl(dataset_path + 'CAP.pkl') self.train_ids = ['vid%s'%i for i in range(1,1201)] self.valid_ids = ['vid%s'%i for i in range(1201,1301)] self.test_ids = ['vid%s'%i for i in range(1301,1971)] else: raise NotImplementedError() self.worddict = common.load_pkl(dataset_path + 'worddict.pkl') self.word_idict = dict() # wordict start with index 2 for kk, vv in self.worddict.iteritems(): self.word_idict[vv] = kk self.word_idict[0] = '<eos>' self.word_idict[1] = 'UNK' if self.video_feature == 'googlenet': self.ctxg_dim = 1024#global_feature dimension self.ctxl_dim = 4096#local_feature dimension self.ctxglm_dim = 1024#fused dimension self.ctxm_dim = 4096#motion_feature dimension else: raise NotImplementedError() self.kf_train = common.generate_minibatch_idx( len(self.train), self.mb_size_train) self.kf_valid = common.generate_minibatch_idx( len(self.valid), self.mb_size_test) self.kf_test = common.generate_minibatch_idx( len(self.test), self.mb_size_test)
def load_data(self): if self.signature == 'youtube2text': print 'loading youtube2text %s features' % self.video_feature dataset_path = common.get_rab_dataset_base_path() + 'data/' self.train = common.load_pkl(dataset_path + 'train.pkl') self.valid = common.load_pkl(dataset_path + 'valid.pkl') self.test = common.load_pkl(dataset_path + 'test.pkl') self.CAP = common.load_pkl(dataset_path + 'CAP.pkl') self.s_o = common.load_pkl( dataset_path + 'sem_obj_new.pkl') # load extracted visual tags self.train_ids = ['vid%s' % i for i in range(1, 1201)] self.valid_ids = ['vid%s' % i for i in range(1201, 1301)] self.test_ids = ['vid%s' % i for i in range(1301, 1971)] else: raise NotImplementedError() self.worddict = common.load_pkl(dataset_path + 'worddict.pkl') # adding the 3000 words in visual genome to our vocabulary with open(dataset_path + 'vg_list', 'r') as f: line = f.readline() line = line[:-1] semantic_object = [] while line: semantic_object.append(line) line = f.readline() line = line[:-1] new_s_o = [] new_s_os = [] for o in semantic_object: new_s_o = o.split('_') for i in new_s_o: if i not in new_s_os: new_s_os.append(i) new_s_os_l = len(new_s_os) for oo in new_s_os: word_l = len(self.worddict) # if oo not in self.word_idict.values(): if self.worddict.has_key(oo): continue else: self.worddict[oo] = word_l self.word_idict = dict() # wordict start with index 2 for kk, vv in self.worddict.iteritems(): self.word_idict[vv] = kk self.word_idict[0] = '<eos>' self.word_idict[1] = 'UNK' if self.video_feature == 'resnet152': self.ctx_dim = 4096 else: raise NotImplementedError() self.kf_train = common.generate_minibatch_idx(len(self.train), self.mb_size_train) self.kf_valid = common.generate_minibatch_idx(len(self.valid), self.mb_size_test) self.kf_test = common.generate_minibatch_idx(len(self.test), self.mb_size_test)
def load_data(self): if self.signature == 'youtube2text' or self.signature == 'trecvid': print 'loading {} {} features'.format(self.signature, self.video_feature) if self.data_dir=='': dataset_path = common.get_rab_dataset_base_path()+'youtube2text/'+self.video_feature else: dataset_path = self.data_dir # dataset_path = common.get_rab_dataset_base_path() self.train = common.load_pkl(os.path.join(dataset_path ,'train.pkl')) self.valid = common.load_pkl(os.path.join(dataset_path ,'valid.pkl')) self.test = common.load_pkl(os.path.join(dataset_path ,'test.pkl')) self.CAP = common.load_pkl(os.path.join(dataset_path , 'CAP.pkl')) # self.FEAT = common.load_pkl(os.path.join(dataset_path , 'FEAT_key_vidID_value_features_'+self.proc+'.pkl')) self.load_feats(dataset_path) self.train_ids = list(set(self.train[i].split('_')[0] for i in range(len(self.train)))) self.valid_ids = list(set(self.valid[i].split('_')[0] for i in range(len(self.valid)))) self.test_ids = list(set(self.test[i].split('_')[0] for i in range(len(self.test)))) elif self.signature == 'lsmdc' or self.signature == 'lsmdc16' or self.signature == 'mvad' or self.signature == 'mpii' or self.signature == 'tacos': print 'loading {} {} features'.format(self.signature, self.video_feature) dataset_path = self.data_dir self.train = common.load_pkl(os.path.join(dataset_path, 'train.pkl')) self.valid = common.load_pkl(os.path.join(dataset_path, 'valid.pkl')) self.test = common.load_pkl(os.path.join(dataset_path, 'test.pkl')) self.CAP = common.load_pkl(os.path.join(dataset_path, 'CAP.pkl')) self.train_ids = self.train self.valid_ids = self.valid self.test_ids = self.test if self.signature == 'lsmdc16': self.btest = common.load_pkl(os.path.join(dataset_path, 'blindtest.pkl')) self.btest_ids = self.btest elif self.signature == 'ysvd': print 'loading ysvd %s features'%self.video_feature dataset_path = common.get_rab_dataset_base_path()+'ysvd/' self.all = common.load_pkl(os.path.join(dataset_path, 'all_vids.pkl')) self.CAP = common.load_pkl(os.path.join(dataset_path, 'CAP.pkl')) self.FEAT = common.load_pkl(os.path.join(dataset_path, 'FEAT_key_vidID_value_features.pkl')) self.train = self.all[0:500] self.valid = self.all[501:750] self.test = self.all[751:1000] self.train_ids = self.train self.valid_ids = self.valid self.test_ids = self.test elif self.signature == 'vtt16' or self.signature == 'vtt17': print 'loading {} {} features'.format(self.signature, self.video_feature) if self.data_dir=='': dataset_path = common.get_rab_dataset_base_path()+'vtt/'+self.video_feature else: dataset_path = self.data_dir self.train = common.load_pkl(os.path.join(dataset_path, 'train.pkl')) self.valid = common.load_pkl(os.path.join(dataset_path, 'valid.pkl')) self.test = common.load_pkl(os.path.join(dataset_path, 'test.pkl')) self.CAP = common.load_pkl(os.path.join(dataset_path, 'CAP.pkl')) self.load_feats(dataset_path) # Get list of just the videoID, instead of videoID_CapID. Use set to ignore duplicates, then recast to list self.train_ids = list(set(self.train[i].split('_')[0] for i in range(len(self.train)))) self.valid_ids = list(set(self.valid[i].split('_')[0] for i in range(len(self.valid)))) self.test_ids = list(set(self.test[i].split('_')[0] for i in range(len(self.test)))) self.test_ids = self.test_ids #only for testing else: raise NotImplementedError() self.worddict = common.load_pkl(os.path.join(dataset_path ,'worddict.pkl')) self.word_idict = dict() # wordict start with index 2 for kk, vv in self.worddict.iteritems(): self.word_idict[vv] = kk self.word_idict[0] = '<eos>' self.word_idict[1] = 'UNK' if self.video_feature == 'googlenet': self.ctx_dim = 1024 elif self.video_feature == 'resnet' or self.video_feature == 'resnet152': if self.proc=='nostd': self.ctx_dim = 2048 elif self.proc=='pca': self.ctx_dim=1024 elif self.video_feature == 'nasnetalarge': self.ctx_dim = 4032 elif self.video_feature == 'pnasnet5large': self.ctx_dim = 4320 elif self.video_feature == 'polynet': self.ctx_dim = 2048 elif self.video_feature == 'senet154': self.ctx_dim = 2048 elif self.video_feature == 'densenet121': raise NotImplementedError() elif self.video_feature == 'c3d': if self.proc=='nostd': self.ctx_dim = 4101 elif self.proc=='pca': self.ctx_dim=1024 elif self.video_feature == 'c3d_resnet': if self.proc=='nostd': self.ctx_dim = 6149 elif self.proc=='pca': self.ctx_dim=2048 elif self.proc=='pca512': self.ctx_dim=1024 elif self.proc=='pca_c3d': self.ctx_dim=3072 else: raise NotImplementedError() print "ctx_dim: "+str(self.ctx_dim) self.kf_train = common.generate_minibatch_idx( len(self.train), self.mb_size_train) self.kf_valid = common.generate_minibatch_idx( len(self.valid), self.mb_size_test) self.kf_test = common.generate_minibatch_idx( len(self.test), self.mb_size_test) if self.dec == 'multi-stdist': self.skip_vectors = common.load_pkl(os.path.join(dataset_path,'skip_vectors.pkl'))