def preprocess_batch(batch): if len(batch.x_train.shape) == 4: x_train = np.expand_dims(batch.x_train, 0) y_train = np.expand_dims(batch.y_train, 0) x_test = np.expand_dims(batch.x_test, 0) y_test = np.expand_dims(batch.y_test, 0) if batch.x_unlabel is not None: x_unlabel = np.expand_dims(batch.x_unlabel, 0) else: x_unlabel = None if hasattr(batch, 'y_unlabel') and batch.y_unlabel is not None: y_unlabel = np.expand_dims(batch.y_unlabel, 0) else: y_unlabel = None return Episode(x_train, y_train, x_test, y_test, x_unlabel=x_unlabel, y_unlabel=y_unlabel, y_train_str=batch.y_train_str, y_test_str=batch.y_test_str) else: return batch
def _process_batch(self, batch, super_classes=False): """Convert np arrays to variable""" x_train = Variable(torch.from_numpy(batch.x_train).type( torch.FloatTensor), requires_grad=False).cuda() x_test = Variable(torch.from_numpy(batch.x_test).type( torch.FloatTensor), requires_grad=False).cuda() if batch.x_unlabel is not None and batch.x_unlabel.size > 0: x_unlabel = Variable(torch.from_numpy(batch.x_unlabel).type( torch.FloatTensor), requires_grad=False).cuda() y_unlabel = Variable(torch.from_numpy( batch.y_unlabel.astype(np.int64)), requires_grad=False).cuda() else: x_unlabel = None y_unlabel = None if super_classes: labels_train = Variable(torch.from_numpy( batch.y_train_str[:, 1]).type(torch.LongTensor), requires_grad=False).unsqueeze(0).cuda() labels_test = Variable(torch.from_numpy( batch.y_test_str[:, 1]).type(torch.LongTensor), requires_grad=False).unsqueeze(0).cuda() else: labels_train = Variable(torch.from_numpy( batch.y_train.astype(np.int64)[:, :, 1]), requires_grad=False).cuda() labels_test = Variable(torch.from_numpy( batch.y_test.astype(np.int64)[:, :, 1]), requires_grad=False).cuda() return Episode(x_train, labels_train, np.expand_dims(batch.train_indices, 0), x_test, labels_test, np.expand_dims(batch.test_indices, 0), x_unlabel=x_unlabel, y_unlabel=y_unlabel, unlabel_indices=np.expand_dims(batch.unlabel_indices, 0), y_train_str=batch.y_train_str, y_test_str=batch.y_test_str)
def next_episode(self, within_category=False, catcode=None): """ (1) Pick random set of classes (2) Pick random partitioning into train, test, unlabeled """ sel_classes = np.random.choice(range(len(self.class_dict.keys())), size=self.al_instance.n_class + self.al_instance.n_distractor, replace=False) k_per_class = [ None for i in range(self.al_instance.n_class + self.al_instance.n_distractor) ] total_train = None total_test = None total_unlbl = None for idx, cl in enumerate(sel_classes[:self.al_instance.n_class]): train, test, unlbl = self._get_rand_partition( self.class_dict.keys()[cl], idx, k_per_class[idx]) total_train = self._concat_or_identity(total_train, train) total_test = self._concat_or_identity(total_test, test) total_unlbl = self._concat_or_identity(total_unlbl, unlbl) for idx, cl in enumerate(sel_classes[self.al_instance.n_class:]): unlbl = self._get_rand_partition(self.class_dict.keys()[cl], self.al_instance.n_class + idx, k_per_class[idx]) total_unlbl = self._concat_or_identity(total_unlbl, unlbl) assert self._check_shape(total_train, self.al_instance.n_class, self.al_instance.k_train) assert self._check_shape(total_test, self.al_instance.n_class, self.al_instance.k_test) assert self._check_shape( total_unlbl, self.al_instance.n_class + self.al_instance.n_distractor, self.al_instance.k_unlbl) return Episode(x_train=total_train.data, y_train=total_train.labels, x_test=total_test.data, y_test=total_test.labels, x_unlabel=total_unlbl.data, y_unlabel=total_unlbl.labels)
def preprocess_batch(batch): if len(batch.x_train.shape) == 4: x_train = batch.x_train x_test = batch.x_test if batch.x_unlabel is not None: x_unlabel = batch.x_unlabel x_train = np.expand_dims(x_train, 0) y_train = np.expand_dims(batch.y_train, 0) x_test = np.expand_dims(x_test, 0) y_test = np.expand_dims(batch.y_test, 0) if batch.x_unlabel is not None: x_unlabel = np.expand_dims(x_unlabel, 0) x_unlabel = np.rollaxis(x_unlabel, 4, 2) else: x_unlabel = None if hasattr(batch, 'y_unlabel') and batch.y_unlabel is not None: y_unlabel = np.expand_dims(batch.y_unlabel, 0) else: y_unlabel = None x_train = np.rollaxis(x_train, 4, 2) x_test = np.rollaxis(x_test, 4, 2) return Episode( x_train, y_train, batch.train_indices, x_test, y_test, batch.test_indices, x_unlabel=x_unlabel, y_unlabel=y_unlabel, unlabel_indices = batch.unlabel_indices, y_train_str=batch.y_train_str, y_test_str=batch.y_test_str)
def next_episode(self, within_category=False, catcode=None): """Gets a new episode. within_category: bool. Whether or not to choose the N classes to all belong to the same more general category. (Only applicable for datasets with self._category_labels defined). within_category: bool. Whether or not to restrict the episode's classes to belong to the same general category (only applicable for JakeImageNet). If True, a random general category will be chosen, unless catcode is set. catcode: str. (e.g. 'n02795169') if catcode is provided (is not None), then the classes chosen for this episode will be restricted to be synsets belonging to the more general category with code catcode. """ if within_category or not catcode is None: assert hasattr(self, "_category_labels") assert hasattr(self, "_category_label_str") if catcode is None: # Choose a category for this episode's classes cat_idx = np.random.randint(len(self._category_label_str)) catcode = self._catcode_to_syncode.keys()[cat_idx] cat_synsets = self._catcode_to_syncode[catcode] cat_synsets_str = [self._syncode_to_str[code] for code in cat_synsets] allowable_inds = [] for str in cat_synsets_str: allowable_inds.append(np.where(np.array(self._label_str) == str)[0]) class_seq = np.array(allowable_inds).reshape((-1)) else: num_label_cls = len(self._label_str) class_seq = np.arange(num_label_cls) self._rnd.shuffle(class_seq) train_img_ids = [] train_labels = [] test_img_ids = [] test_labels = [] train_unlabel_img_ids = [] non_distractor = [] train_labels_str = [] test_labels_str = [] is_training = self._split in ["train", "trainval"] assert is_training or self._split in ["val", "test"] for ii in range(self._nway + self._num_distractor): cc = class_seq[ii] # print(cc, ii < self._nway) _ids = self._label_idict[cc] # Split the image IDs into labeled and unlabeled. _label_ids = filter(lambda _id: _id in self._label_split_idx_set, _ids) _unlabel_ids = filter(lambda _id: _id not in self._label_split_idx_set, _ids) self._rnd.shuffle(_label_ids) self._rnd.shuffle(_unlabel_ids) # Add support set and query set (not for distractors). if ii < self._nway: train_img_ids.extend(_label_ids[:self._nshot]) # Use the rest of the labeled image as queries, if num_test = -1. QUERY_SIZE_LARGE_ERR_MSG = ( "Query + reference should be less than labeled examples." + "Num labeled {} Num test {} Num shot {}".format( len(_label_ids), self._num_test, self._nshot)) assert self._nshot + self._num_test <= len( _label_ids), QUERY_SIZE_LARGE_ERR_MSG if self._num_test == -1: if is_training: num_test = len(_label_ids) - self._nshot else: num_test = len(_label_ids) - self._nshot - self._num_unlabel else: num_test = self._num_test if is_training: assert num_test <= len(_label_ids) - self._nshot else: assert num_test <= len(_label_ids) - self._num_unlabel - self._nshot test_img_ids.extend(_label_ids[self._nshot:self._nshot + num_test]) train_labels.extend([ii] * self._nshot) train_labels_str.extend([self._label_str[cc]] * self._nshot) test_labels.extend([ii] * num_test) test_labels_str.extend([self._label_str[cc]] * num_test) non_distractor.extend([1] * self._num_unlabel) else: non_distractor.extend([0] * self._num_unlabel) # Add unlabeled images here. if is_training: # Use labeled, unlabeled split here for refinement. train_unlabel_img_ids.extend(_unlabel_ids[:self._num_unlabel]) else: # Copy test set for refinement. # This will only work if the test procedure is rolled out in a sequence. train_unlabel_img_ids.extend(_label_ids[ self._nshot + num_test:self._nshot + num_test + self._num_unlabel]) train_img = self.get_images(train_img_ids) / 255.0 train_unlabel_img = self.get_images(train_unlabel_img_ids) / 255.0 test_img = self.get_images(test_img_ids) / 255.0 train_labels = np.array(train_labels) test_labels = np.array(test_labels) train_labels_str = np.array(train_labels_str) test_labels_str = np.array(test_labels_str) non_distractor = np.array(non_distractor) test_ids_set = set(test_img_ids) for _id in train_unlabel_img_ids: assert _id not in test_ids_set if self._shuffle_episode: # log.fatal('') # Shuffle the sequence order in an episode. Very important for RNN based # meta learners. train_idx = np.arange(train_img.shape[0]) self._rnd.shuffle(train_idx) train_img = train_img[train_idx] train_labels = train_labels[train_idx] train_unlabel_idx = np.arange(train_unlabel_img.shape[0]) self._rnd.shuffle(train_unlabel_idx) train_unlabel_img = train_unlabel_img[train_unlabel_idx] test_idx = np.arange(test_img.shape[0]) self._rnd.shuffle(test_idx) test_img = test_img[test_idx] test_labels = test_labels[test_idx] return Episode( train_img, train_labels, test_img, test_labels, x_unlabel=train_unlabel_img, y_unlabel=non_distractor, y_train_str=train_labels_str, y_test_str=test_labels_str)
def next(self, within_category=False, catcode=None): """ (1) Pick random set of classes (2) Pick random partitioning into train, test, unlabeled """ sel_classes = np.random.choice(range(len(self.class_dict.keys())), size=self.al_instance.n_class + self.al_instance.n_distractor, replace=False) # if len(self.class_dict.keys()) < 50: k_per_class = [ None for i in range(self.al_instance.n_class + self.al_instance.n_distractor) ] total_train = None total_test = None total_unlbl = None n_class = self.al_instance.n_class n_distractor = self.al_instance.n_distractor k_train = self.al_instance.k_train k_test = self.al_instance.k_test k_unlbl = self.al_instance.k_unlbl total_train = np.zeros([n_class * k_train, 84, 84, 3], dtype=np.float32) total_test = np.zeros([n_class * k_test, 84, 84, 3], dtype=np.float32) total_unlbl = np.zeros([(n_class + n_distractor) * k_unlbl, 84, 84, 3], dtype=np.float32) total_train_label = np.zeros([n_class * k_train], dtype=np.int64) total_test_label = np.zeros([n_class * k_test], dtype=np.int64) total_unlbl_label = np.zeros([n_class * k_unlbl], dtype=np.int64) y_train_str = [] y_test_str = [] for idx, cl in enumerate(sel_classes[:self.al_instance.n_class]): train, test, unlbl = self._get_rand_partition( list(self.class_dict.keys())[cl], idx, k_per_class[idx]) total_train[idx * k_train:(idx + 1) * k_train] = train total_test[idx * k_test:(idx + 1) * k_test] = test y_train_str.extend([cl] * k_train) y_test_str.extend([cl] * k_test) total_unlbl[idx * k_unlbl:(idx + 1) * k_unlbl] = unlbl total_train_label[idx * k_train:(idx + 1) * k_train] = idx total_test_label[idx * k_test:(idx + 1) * k_test] = idx total_unlbl_label[idx * k_unlbl:(idx + 1) * k_unlbl] = 1 for idx, cl in enumerate(sel_classes[self.al_instance.n_class:]): unlbl = self._get_rand_partition( list(self.class_dict.keys())[cl], self.al_instance.n_class + idx, k_per_class[idx]) total_unlbl[(idx + n_class) * k_unlbl:(idx + n_class + 1) * k_unlbl] = unlbl if self._split == 'train_phase_train': y_sel = sel_classes[:self.al_instance.n_class] else: y_sel = None # No need to forbid here. if self._split == 'train_phase_train': for jj in range(total_train.shape[0]): total_train[jj] = self._sess.run( self._rnd_process, feed_dict={self._rnd_process_plh: total_train[jj]}) for jj in range(total_test.shape[0]): total_test[jj] = self._sess.run( self._rnd_process, feed_dict={self._rnd_process_plh: total_test[jj]}) total_train = self.normalize(total_train) total_unlbl = self.normalize(total_unlbl) total_test = self.normalize(total_test) return Episode(x_train=total_train, y_train=total_train_label, x_test=total_test, y_test=total_test_label, x_unlabel=total_unlbl, y_unlabel=total_unlbl_label, y_train_str=y_train_str, y_test_str=y_test_str, y_sel=y_sel)
def next_episode(self, within_category=False): """Gets a new episode. within_category: bool. Whether or not to choose classes which all belong to the same more general category. (Only applicable for datasets with self._category_labels defined). """ num_label_cls = len(self._label_str) if self._mode_ratio < 1.0: if self._train_modes: self.class_seq = list( filter(lambda _id: _id in self._class_train_set, range(0, num_label_cls))) else: self.class_seq = list( filter(lambda _id: _id not in self._class_train_set, range(0, num_label_cls))) else: self.class_seq = np.arange(num_label_cls) train_img_ids = [] train_labels = [] test_img_ids = [] test_labels = [] train_unlabel_img_ids = [] non_distractor = [] train_labels_str = [] test_labels_str = [] self._rnd.shuffle(self.class_seq) ##Get a list of image indices (class_seq_i) which are within cat_way (number of categories per episode) randomly selected categories if within_category and self._cat_way != -1: assert hasattr(self, "_category_labels") cat_labels = np.unique(self._category_labels) num_cats = len(cat_labels) cat_idxs = self._rnd.choice(cat_labels, min(self._cat_way, num_cats), replace=False) allowable_inds = np.empty((1)) for cat_idx in cat_idxs: current_inds = np.where( np.array(self._category_labels) == cat_idx)[0] filtered_inds = list( filter(lambda _id: _id in self.class_seq, current_inds)) self._rnd.shuffle(filtered_inds) allowable_inds = np.concatenate( (allowable_inds, filtered_inds[0:min(self._nway, len(filtered_inds))])) class_seq_i = (allowable_inds[1:]).astype(np.int64) self._rnd.shuffle(class_seq_i) total_way = len(class_seq_i) else: total_way = self._nway class_seq_i = self.class_seq is_training = self._split in ["train", "trainval"] assert is_training or self._split in ["val", "test"] for ii in range(total_way + self._num_distractor): cc = class_seq_i[ii] _ids = self._label_idict[cc] # Split the image IDs into labeled and unlabeled. _label_ids = list( filter(lambda _id: _id in self._label_split_idx_set, _ids)) _unlabel_ids = list( filter(lambda _id: _id not in self._label_split_idx_set, _ids)) self._rnd.shuffle(_label_ids) self._rnd.shuffle(_unlabel_ids) if not is_training: train_idx = self._nshot + self._num_unlabel else: train_idx = self._nshot _label_train_ids = _label_ids[:train_idx] _label_test_ids = _label_ids[train_idx:] self._rnd.shuffle(_label_train_ids) self._rnd.shuffle(_label_test_ids) test_end_idx = self._nshot class_idx = [cc, ii] if self._num_test == -1: if is_training: num_test = len(_label_test_ids) else: num_test = len(_label_test_ids) - self._num_unlabel - 1 else: num_test = self._num_test if is_training: assert num_test <= len(_label_test_ids) else: assert num_test <= len(_label_test_ids) - self._num_unlabel # Add support set and query set (not for distractors). if hasattr( self, "_category_labels") and self._category_labels is not None: label_strs = self._category_labels else: label_strs = self._label_str if ii < total_way: train_img_ids.extend(_label_train_ids[:self._nshot]) # Use the rest of the labeled image as queries, if num_test = -1. QUERY_SIZE_LARGE_ERR_MSG = ( "Query + reference should be less than labeled examples." + "Num labeled {} Num test {} Num shot {}".format( len(_label_ids), self._num_test, self._nshot)) assert self._nshot + self._num_test <= len( _label_ids), QUERY_SIZE_LARGE_ERR_MSG test_img_ids.extend(_label_test_ids[:num_test]) train_labels.extend([class_idx] * self._nshot) train_labels_str.extend([label_strs[cc]] * self._nshot) test_labels.extend([class_idx] * num_test) test_labels_str.extend([label_strs[cc]] * num_test) non_distractor.extend([class_idx] * self._num_unlabel) else: non_distractor.extend([[-1, -1]] * self._num_unlabel) # Add unlabeled images here. if is_training: # Use labeled, unlabeled split here for refinement. train_unlabel_img_ids.extend(_unlabel_ids[:self._num_unlabel]) else: train_unlabel_img_ids.extend( _label_train_ids[self._nshot:self._nshot + self._num_unlabel]) train_img = self.get_images(train_img_ids) / 255.0 train_unlabel_img = self.get_images(train_unlabel_img_ids) / 255.0 test_img = self.get_images(test_img_ids) / 255.0 train_labels = np.array(train_labels) test_labels = np.array(test_labels) if hasattr(self, "_category_labels"): train_labels_str = np.hstack( (np.array(train_labels_str)[:, None], np.array(self.episodic_labels(train_labels_str))[:, None])) test_labels_str = np.hstack( (np.array(test_labels_str)[:, None], np.array(self.episodic_labels(test_labels_str))[:, None])) else: train_labels_str = np.array(train_labels_str) test_labels_str = np.array(test_labels_str) non_distractor = np.array(non_distractor) test_ids_set = set(test_img_ids) for _id in train_unlabel_img_ids: assert _id not in test_ids_set return Episode(x_train=train_img, train_indices=train_img_ids, y_train=train_labels, x_test=test_img, test_indices=test_img_ids, y_test=test_labels, x_unlabel=train_unlabel_img, y_unlabel=non_distractor, unlabel_indices=train_unlabel_img_ids, y_train_str=train_labels_str, y_test_str=test_labels_str)