def split_superinstance(self, si, k): # the actual splitting pred = kshape(self.data[si.indices, :], k) # making sure that all of the super-instances contain at least one training instance # super-instances without training instance are merged with the closest one that does contain a # training instance training = [] no_training = [] for new_si_centroid, new_si_idx in pred: # go from super instance indices to global ones cur_indices = [si.indices[idx] for idx in new_si_idx] si_train_indices = [x for x in cur_indices if x in self.train_indices] if len(si_train_indices) != 0: training.append(SuperInstance_kShape(self.data, cur_indices, self.train_indices, new_si_centroid, si)) else: no_training.append((cur_indices, new_si_centroid)) for indices, centroid in no_training: # sets of indices without a training point are merged with their closest super-instance # closeness is based on the SBD centroid closest_train = None closest_train_dist = np.inf for training_si in training: cur_dist, _ = _sbd(training_si.sbd_centroid, centroid) if cur_dist < closest_train_dist: closest_train_dist = cur_dist closest_train = training_si closest_train.indices.extend(indices) si.children = training return training
def get_prototype(A, indices, prototype): max_dist_to_others = np.inf prototype_idx = None for idx in indices: cur_dist, _ = _sbd(A[idx, :], prototype) if cur_dist < max_dist_to_others: max_dist_to_others = cur_dist prototype_idx = idx return prototype_idx
def distance_between_instances(x1, x2): d, _ = _sbd(x1, x2) return d
def distance_to(self, other_superinstance): d, _ = _sbd(self.sbd_centroid, other_superinstance.sbd_centroid) return d