Ejemplo n.º 1
0
 def mahalanobis_distance(self, point):
     if (self.stale):
         self.update_statistics()
     centroid, variance = self.centroid, self.variance
     distance = 0
     for i in range(1, self.D):
         distance = distance + ((point[i] - centroid[i]) / variance[i])**2
     distance = math.sqrt(distance)
     return distance
Ejemplo n.º 2
0
    def merge_CS(self):
        alpha = 1  # self.alpha
        if len(self.CS_set) == 0:
            return
        dimension = self.CS_set[0].D
        exist = [True] * len(self.CS_set)
        THRESHOLD = alpha * math.sqrt(dimension)  # a small alpha here
        index_combination = list(
            itertools.combinations(list(range(len(self.CS_set))), 2))
        index_combination_distance = []
        cs_batch_process_merge = {}
        for i in range(len(self.CS_set)):
            self.CS_set[i].update_statistics()
        for (s, t) in index_combination:
            s_centroid = self.CS_set[s].centroid
            distance = self.CS_set[t].mahalanobis_distance(s_centroid)
            index_combination_distance.append((s, t, distance))
        index_combination_distance = sorted(index_combination_distance,
                                            key=lambda x: (x[2], x[0], x[1]))

        for (s, t, dis) in index_combination_distance:
            if dis > THRESHOLD:
                break
            if exist[s] == False:
                continue
            if dis < THRESHOLD:
                exist[s] = False
                if t not in cs_batch_process_merge:
                    cs_batch_process_merge[t] = []
                cs_batch_process_merge[t].append(s)

        for i in range(len(self.CS_set)):
            if i not in cs_batch_process_merge:
                continue
            for j in range(len(self.CS_set)):
                if j in cs_batch_process_merge and i in cs_batch_process_merge[
                        j]:
                    cs_batch_process_merge[j] += cs_batch_process_merge[i]
                    del cs_batch_process_merge[i]

        remove_index_list = set()
        for dest in cs_batch_process_merge:
            for source in cs_batch_process_merge[dest]:
                self.CS_set[dest].merge_Cluster(self.CS_set[source])
                remove_index_list.add(source)

        # print("[Before]==> The number of CS is : " + str(len(self.CS_set)))
        # print(cs_batch_process_merge)
        tmp = copy.deepcopy(self.CS_set)
        length = len(self.CS_set)
        del self.CS_set
        self.CS_set = []
        for i in range(length):
            if i not in remove_index_list:
                self.CS_set.append(tmp[i])
        del tmp
Ejemplo n.º 3
0
 def update_statistics(self):
     centroid = [0] * self.D
     variance = [0] * self.D
     for i in range(1, self.D):
         centroid[i] = self.SUM[i] / self.N
         # variance[i] = (self.SUMSQ[i] / self.N) - math.pow((self.SUM[i] / self.N),2)
         # variance[i] = math.sqrt(variance[i])
         variance[i] = math.sqrt((self.SUMSQ[i] / self.N) -
                                 (self.SUM[i] / self.N)**2)
     self.stale = False
     self.centroid, self.variance = centroid, variance
Ejemplo n.º 4
0
def find_nearest_DS(DS_set, alpha, point):
    dimension = DS_set[0].D - 1
    THRESHOLD = alpha * math.sqrt(dimension)
    distance = float("inf")
    _label = -1
    for label in DS_set:
        dist = DS_set[label].mahalanobis_distance(point)
        if dist < distance:
            distance = dist
            if dist < THRESHOLD:
                _label = label
    return _label
Ejemplo n.º 5
0
def find_nearest_CS(CS_set, alpha, point):
    # CS_set is a list
    if len(CS_set) == 0:
        return -1
    dimension = CS_set[0].D - 1
    THRESHOLD = alpha * math.sqrt(dimension)
    distance = float("inf")
    _index = -1
    for index in range(len(CS_set)):
        dist = CS_set[index].mahalanobis_distance(point)
        if dist < distance and dist < THRESHOLD:
            distance = dist
            _index = index
    return _index
Ejemplo n.º 6
0
 def euclidean_distance(self, a, b):
     a = a[1:]
     b = b[1:]
     c = [pow(i - j, 2) for i, j in zip(a, b)]
     return math.sqrt(sum(c))