Esempio n. 1
0
    def split_superinstance(self, si, k):
        # the actual splitting
        pred = kshape(self.data[si.indices, :], k)

        # making sure that all of the super-instances contain at least one training instance
        # super-instances without training instance are merged with the closest one that does contain a
        # training instance
        training = []
        no_training = []

        for new_si_centroid, new_si_idx in pred:
            # go from super instance indices to global ones
            cur_indices = [si.indices[idx] for idx in new_si_idx]

            si_train_indices = [x for x in cur_indices if x in self.train_indices]
            if len(si_train_indices) != 0:
                training.append(SuperInstance_kShape(self.data, cur_indices, self.train_indices, new_si_centroid, si))
            else:
                no_training.append((cur_indices, new_si_centroid))

        for indices, centroid in no_training:
            # sets of indices without a training point are merged with their closest super-instance
            # closeness is based on the SBD centroid
            closest_train = None
            closest_train_dist = np.inf
            for training_si in training:
                cur_dist, _ = _sbd(training_si.sbd_centroid, centroid)
                if cur_dist < closest_train_dist:
                    closest_train_dist = cur_dist
                    closest_train = training_si
            closest_train.indices.extend(indices)

        si.children = training

        return training
Esempio n. 2
0
def get_prototype(A, indices, prototype):
    max_dist_to_others = np.inf
    prototype_idx = None

    for idx in indices:
        cur_dist, _ = _sbd(A[idx, :], prototype)
        if cur_dist < max_dist_to_others:
            max_dist_to_others = cur_dist
            prototype_idx = idx
    return prototype_idx
Esempio n. 3
0
 def distance_between_instances(x1, x2):
     d, _ = _sbd(x1, x2)
     return d
Esempio n. 4
0
 def distance_to(self, other_superinstance):
     d, _ = _sbd(self.sbd_centroid, other_superinstance.sbd_centroid)
     return d