def generate_features(self, out_file, feat_extractor, transform_type): data = self.train_data+self.val_data+self.test_data transform = data_utils.imagenet_transform('test', transform_type) if feat_extractor is None: feat_extractor = torchvision.models.resnet18(pretrained=True) feat_extractor.fc = torch.nn.Sequential() feat_extractor.eval().cuda() image_feats = [] image_files = [] for chunk in tqdm.tqdm(data_utils.chunks(data, 512), total=len(data)//512): files = zip(*chunk)[0] imgs = list(map(self.loader, files)) imgs = list(map(transform, imgs)) feats = feat_extractor(torch.stack(imgs, 0).cuda()) image_feats.append(feats.data.cpu()) image_files += files image_feats = torch.cat(image_feats, 0) print ('features for %d images generated'%(len(image_files))) torch.save({'features': image_feats, 'files': image_files}, out_file)
def __init__(self, root, phase, feat_file, split='compositional-split', with_image=False, obj_pred=None, transform_type='normal'): self.root = root self.phase = phase self.split = split self.with_image = with_image self.feat_dim = None self.transform = data_utils.imagenet_transform(phase, transform_type) self.loader = data_utils.ImageLoader(self.root + '/images/') if feat_file is not None: feat_file = os.path.join(root, feat_file) activation_data = torch.load(feat_file) self.activation_dict = dict( zip(activation_data['files'], activation_data['features'])) self.feat_dim = activation_data['features'].size(1) print('%d activations loaded' % (len(self.activation_dict))) # pair = (attr, obj) (self.attrs, self.objs, self.pairs, self.train_pairs, self.val_pairs, self.test_pairs) = self.parse_split() self.attr2idx = {attr: idx for idx, attr in enumerate(self.attrs)} self.obj2idx = {obj: idx for idx, obj in enumerate(self.objs)} self.pair2idx = {pair: idx for idx, pair in enumerate(self.pairs)} self.train_data, self.val_data, self.test_data = self.get_split_info() if self.phase == 'train': self.data = self.train_data elif self.phase == 'val': self.data = self.val_data elif self.phase == 'test': self.data = self.test_data # list of [img_name, attr, obj, attr_id, obj_id, feat] print('#images = %d' % len(self.data)) # fix later -- affordance thing # return {object: all attrs that occur with obj} self.obj_affordance_mask = [] for _obj in self.objs: candidates = [ x[1] for x in self.train_data + self.test_data if x[2] == _obj ] # x = (_,attr,obj,_,_,_) affordance = set(candidates) mask = [1 if x in affordance else 0 for x in self.attrs] self.obj_affordance_mask.append(mask) # negative image pool samples_grouped_by_obj = [[] for _ in range(len(self.objs))] for i, x in enumerate(self.train_data): samples_grouped_by_obj[x[4]].append(i) self.neg_pool = [] # [obj_id][attr_id] => list of sample id for obj_id in range(len(self.objs)): self.neg_pool.append([]) for attr_id in range(len(self.attrs)): self.neg_pool[obj_id].append([ i for i in samples_grouped_by_obj[obj_id] if self.train_data[i][3] != attr_id ]) self.comp_gamma = {'a': 1, 'b': 1} self.attr_gamma = {'a': 1, 'b': 1} if obj_pred is None: self.obj_pred = None else: obj_pred_path = osp.join(cfg.ROOT_DIR, 'data/obj_scores', obj_pred) print("Loading object prediction from %s" % obj_pred_path.split('/')[-1]) with open(obj_pred_path, 'rb') as fp: self.obj_pred = np.array(pickle.load(fp), dtype=np.float32)