def build_tree(self): if self.name not in IMAGE_DS: raise ValueError('Unknown image dataset: {}'.format(self.name)) ds_class = IMAGE_DS[self.name] common_params = dict(root=self.data_path, download=True) if self.name in ['svhn', 'dtd', 'aircraft']: self.trainset = ds_class(split='train', **common_params) self.testset = ds_class(split='test', **common_params) else: self.trainset = ds_class(train=True, **common_params) self.testset = ds_class(train=False, **common_params) self.train_samples, self.train_targets = self._format_ds(self.trainset) self.test_samples, self.test_targets = self._format_ds(self.testset) self.height, self.width = self.train_samples.size()[-2:] taxonomy = TAXONOMY[self.name] concepts = self._build_tree(taxonomy) root_concept = ComposedConcept(concepts, id=self.name) self.tree.add_node(root_concept) for sub_concept in concepts: self.tree.add_edge(root_concept, sub_concept) del self.trainset, self.testset, \ self.train_samples, self.test_samples, \ self.train_targets, self.test_targets return root_concept
def build_tree(self): train = MNIST(download=True, root=os.environ['HOME'] + '/data', train=True) test = MNIST(download=True, root=os.environ['HOME'] + '/data', train=False) assert train.class_to_idx == test.class_to_idx for digit, idx in train.class_to_idx.items(): train_mask = train.targets == idx train_samples = train.data[train_mask] test_mask = test.targets == idx test_samples = test.data[test_mask] concept = DigitConcept(id=digit, data=(train_samples, test_samples)) self.tree.add_node(concept) self.all_nodes.add(concept) self.leaf_nodes.add(concept) root_concept = ComposedConcept(self.leaf_concepts, id='all_digits') for concept in self.leaf_concepts: self.tree.add_edge(root_concept, concept) return root_concept
def _build_tree(self, current_level_concepts): new_concepts = [] if isinstance(current_level_concepts, dict): # Not yet at the lowest level for name, lower_concepts in current_level_concepts.items(): concepts = self._build_tree(lower_concepts) concept_name = '{} {}'.format(self.name, name) new_concept = ComposedConcept(concepts=concepts, id=concept_name) self.tree.add_node(new_concept) self.all_nodes.add(new_concept) for c in concepts: self.tree.add_edge(new_concept, c) new_concepts.append(new_concept) elif isinstance(current_level_concepts, list): # Adding lowest level concepts for c in current_level_concepts: samples = self._get_samples(c) concept_name = '{} {}'.format(self.name, c) concept = ImageConcept(id=concept_name, samples=samples, split_rnd=self.split_rnd, val_size=self.val_size, val_ratio=self.val_ratio) self.tree.add_node(concept) self.all_nodes.add(concept) self.leaf_nodes.add(concept) new_concepts.append(concept) else: raise NotImplementedError() return new_concepts
def _generate_samples_from_descr(categories, attributes, n_samples_per_class, augment): use_cat_id, attributes = attributes assert use_cat_id and not attributes, \ "usage of attributes isn't supporte in v1." samples = [] labels = [] for i, cat_concepts in enumerate(categories): mixture = ComposedConcept(cat_concepts, id=None) cat_samples = [] cat_labels = [] for s_id, n in enumerate(n_samples_per_class): split_samples, split_attrs = mixture._get_samples(n, attributes, split_id=s_id) if s_id in augment: split_samples = augment_samples(split_samples) split_labels = torch.Tensor().long() cat_id = torch.tensor([i]).expand(split_samples.shape[0], 1) split_labels = torch.cat([split_labels, cat_id], dim=1) cat_samples.append(split_samples) cat_labels.append(split_labels) samples.append(cat_samples) labels.append(cat_labels) if torch.is_tensor(samples[0][0]): cat_func = torch.cat else: cat_func = np.concatenate samples = (cat_func(split) for split in zip(*samples)) labels = (torch.cat(split) for split in zip(*labels)) return samples, labels
def _build_tree(self, n_levels, n_children, n_dims, low, high, scale_fact, cov_scale, mean, parent_name): if isinstance(low, Number): low = torch.ones(n_dims) * low if isinstance(high, Number): high = torch.ones(n_dims) * high cluster_means = uniform.Uniform(low + mean, high + mean).sample((n_children[0],)) pref = '\t' * (self.n_levels - n_levels) concepts = [] for i, cluster_mean in enumerate(cluster_means): logger.debug(pref + 'New cluster centered on {}'.format(cluster_mean)) concept_name = '{}{}'.format(parent_name, i) if n_levels > 1: lower_concepts = self._build_tree(n_levels - 1, n_children[1:], n_dims, low=low * scale_fact, high=high * scale_fact, scale_fact=scale_fact, cov_scale=cov_scale, mean=cluster_mean, parent_name=concept_name) concept = ComposedConcept(lower_concepts, cluster_mean=cluster_mean, id=concept_name) self.tree.add_node(concept) for c in lower_concepts: self.tree.add_edge(concept, c) self.all_nodes.add(concept) else: concept = AtomicConcept(cluster_mean, torch.eye(n_dims) * cov_scale, concept_name) self.tree.add_node(concept) self.all_nodes.add(concept) self.leaf_nodes.add(concept) concepts.append(concept) return concepts
def build_tree(self): # if self.name == 'cifar10': # ds_class = CIFAR10 # elif self.name == 'cifar100': # ds_class = CIFAR100 # elif self.name == 'mnist': # ds_class = MNIST # else: # raise ValueError('Unknown image dataset: {}'.format(self.name)) # # self.trainset = ds_class(root=self.data_path, train=True, download=True) # self.testset = ds_class(root=self.data_path, train=False, download=True) # self.train_samples, self.train_targets = self._format_ds(self.trainset) # self.test_samples, self.test_targets = self._format_ds(self.testset) # self.height, self.width = self.train_samples.size()[2:] taxonomy = TAXONOMY[self.name] tasks = self._build_tree(taxonomy) vdd_task = ComposedConcept(tasks, id=self.name) self.tree.add_node(vdd_task) for task in tasks: self.tree.add_edge(vdd_task, task) # print(trainset)dd # del self.trainset, self.testset, \ # self.train_samples, self.test_samples, \ # self.train_targets, self.test_targets return vdd_task
def build_tree(self): concepts = self._build_tree(self.n_levels, self.n_children, self.intrinsic_dims, self.low_bound, self.high_bound, self.scale_fact, self.cov_scale, self.mean, self.name) concept = ComposedConcept(concepts, cluster_mean=self.mean, id=self.name) self.tree.add_node(concept) for c in concepts: self.tree.add_edge(concept, c) return concept
def _generate_samples_from_descr(categories, attributes, n_samples_per_class, augment): use_cat_id, attributes = attributes assert use_cat_id or attributes, 'Each task should at least use the ' \ 'category id or an attribute as labels' if not use_cat_id: all_concepts = np.array( [concept for cat in categories for concept in cat]) all_attrs = np.array([c.attrs for c in all_concepts]) selected_attr = all_attrs[:, attributes[0]] categories = [tuple(all_concepts[selected_attr == val]) for val in np.unique(selected_attr)] if use_cat_id or isinstance(all_concepts[0], AtomicConcept): samples = [] labels = [] for i, cat_concepts in enumerate(categories): mixture = ComposedConcept(cat_concepts, id=None) cat_samples = [] cat_labels = [] for s_id, n in enumerate(n_samples_per_class): split_samples, split_attrs = mixture._get_samples(n, attributes, split_id=s_id) if s_id in augment: split_samples = augment_samples(split_samples) split_labels = torch.Tensor().long() if use_cat_id: cat_id = torch.tensor([i]).expand(split_samples.shape[0], 1) split_labels = torch.cat([split_labels, cat_id], dim=1) if attributes: raise NotImplementedError('Attrs aren\'t supported ' 'anymore') split_labels = torch.cat([split_labels, split_attrs], dim=1) cat_samples.append(split_samples) cat_labels.append(split_labels) samples.append(cat_samples) labels.append(cat_labels) if torch.is_tensor(samples[0][0]): cat_func = torch.cat else: cat_func = np.concatenate samples = (cat_func(split) for split in zip(*samples)) labels = (torch.cat(split) for split in zip(*labels)) else: # Grouping the concepts by attribute value to create the categories all_concepts = np.array( [concept for cat in categories for concept in cat]) samples, labels = get_samples_using_attrs(all_concepts, attributes, n_samples_per_class) return samples, labels
def build_tree(self): child_concepts = [child.root_node for child in self.children] root_concept = ComposedConcept(child_concepts, id=self.name) self.tree.add_node(root_concept) for child in self.children: self.tree = nx.compose(self.tree, child.tree) self.tree.add_edge(root_concept, child.root_node) self.leaf_nodes = self.leaf_nodes.union(child.leaf_nodes) self.all_nodes = self.all_nodes.union(child.all_nodes) return root_concept
def _build_tree(self, current_level_concepts): all_task_concepts = [] for task, classes in current_level_concepts.items(): task_concepts = self.add_task(task) new_concept = ComposedConcept(concepts=task_concepts, id=task) self.tree.add_node(new_concept) self.all_nodes.add(new_concept) for c in task_concepts: self.tree.add_edge(new_concept, c) all_task_concepts.append(new_concept) return all_task_concepts
def _generate_samples_from_descr(categories, attributes, n_samples_per_class, augment, rnd): use_cat_id, attributes = attributes assert use_cat_id and not attributes, \ "usage of attributes isn't supporte in v1." samples = [] labels = [] for i, cat_concepts in enumerate(categories): mixture = ComposedConcept(cat_concepts, id=None) cat_samples = [] cat_labels = [] for s_id, n in enumerate(n_samples_per_class): if isinstance(n, list): n_samples, n_labeled = n[0], n[1] else: n_samples, n_labeled = n, n split_samples, split_attrs = mixture._get_samples(n_samples, attributes, split_id=s_id, rng=rnd) if s_id in augment: split_samples = augment_samples(split_samples) split_labels = torch.Tensor().long() cat_id = torch.tensor([i]).expand(split_samples.shape[0], 1) split_labels = torch.cat([split_labels, cat_id], dim=1) #Create unlabeled samples n_unlabeled = len(split_samples) - n_labeled if n_unlabeled > 0: idx_unlabeled = rnd.choice(list(range(len(split_samples))), n_unlabeled, replace=False) split_labels[ idx_unlabeled] = -1 #mark unlabeled samples with -1 cat_samples.append(split_samples) cat_labels.append(split_labels) samples.append(cat_samples) labels.append(cat_labels) if torch.is_tensor(samples[0][0]): cat_func = torch.cat else: cat_func = np.concatenate samples = (cat_func(split) for split in zip(*samples)) labels = (torch.cat(split) for split in zip(*labels)) return samples, labels
def build_tree(self): # if self.name == 'cifar10': # ds_class = CIFAR10 # elif self.name == 'cifar100': # ds_class = CIFAR100 # elif self.name == 'mnist': # ds_class = MNIST # elif self.name == 'fashion-mnist': # ds_class = FashionMNIST # elif self.name == 'svhn': # ds_class = SVHN # else: # raise ValueError('Unknown image dataset: {}'.format(self.name)) # common_params = dict(root=self.data_path, download=True) # if self.name == 'svhn': # self.trainset = ds_class(split='train', **common_params) # self.testset = ds_class(split='test', **common_params) # else: # self.trainset = ds_class(train=True, **common_params) # self.testset = ds_class(train=False, **common_params) # self.train_samples, self.train_targets = self._format_ds(self.trainset) # self.test_samples, self.test_targets = self._format_ds(self.testset) # self.height, self.width = self.train_samples.size()[2:] # # taxonomy = TAXONOMY[self.name] # # concepts = self._build_tree(taxonomy) child_concepts = [child.root_node for child in self.children] root_concept = ComposedConcept(child_concepts, id=self.name) self.tree.add_node(root_concept) for child in self.children: self.tree = nx.compose(self.tree, child.tree) self.tree.add_edge(root_concept, child.root_node) self.leaf_nodes = self.leaf_nodes.union(child.leaf_nodes) self.all_nodes = self.all_nodes.union(child.all_nodes) # print(trainset)dd # del self.trainset, self.testset, \ # self.train_samples, self.test_samples, \ # self.train_targets, self.test_targets return root_concept