def score(ar): m, n = shape(ar) scoretable = np.zeros((m, 3)) groupids = set(ar[:, -1]) #groupids = np.unique(ar[:, -1]) A, B, S = range(3) for i in range(m): item = ar[i, :] # get a(i) subgroup = ar[np.where(ar[:, -1]==item[-1])] scoretable[i, A] = np.mean(distance(subgroup[:, :-1], item[:-1])) # get b(i) scoretable[i, B] = np.min([np.min(distance(ar[np.where(ar[:, -1]==gid)][:, :-1], item[:-1])) for gid in groupids - {item[-1]}]) # get s(i) scoretable[i, S] = (scoretable[i, B] - scoretable[i, A]) /\ max(scoretable[i, B], scoretable[i, A]) #print(scoretable[i, :]) return scoretable[:, S].mean()
def assigngroup(cls, data, k=0, centroid=None): m, n = shape(data) category = np.zeros((m, 1)) scoretable = np.zeros((m, k)) if centroid: cls.centroid = centroid if k: cls.k = k cdata = np.hstack((data, category)) for c in range(cls.k): scoretable[:, c] = distance(cdata[:, :-1], cls.centroid[c, :]) cdata[:, -1] = scoretable.argmin(axis=1) return cdata
def train(cls, data, k=3, itercount=60): m, n = shape(data) category = np.zeros((m, 1)) cls.centroid = np.array([[5, 3, 5, 1], [4, 3, 1, 1], [5, 3, 1, 0]]) #cls.centroid = get_randomseed(data, (k, n)) cls.k = k ''' u = m / k d = [(u*i, u*(i+1), i) for i in range(k)] for s, e, i in d: category[s:e] = i ''' cdata = np.hstack((data, category)) scoretable = np.zeros((m, k)) old_centroid = None count = 0 while not np.array_equal(old_centroid, cls.centroid) and \ count < itercount: old_centroid = cls.centroid.copy() count += 1 for c in range(cls.k): scoretable[:, c] = distance(data, cls.centroid[c, :]) #check the number of group group = scoretable.argmin(axis=1) if len(np.unique(group)) != cls.k: cls.centroid = get_randomseed(data, (k, n)) continue cdata[:, -1] = group for c in range(k): cls.centroid[c, :] = \ cdata[np.where(cdata[:, -1]==c)].mean(axis=0)[:-1] cls.assigneddata = cdata
def regionquery(self, item): indexes = np.where(distance(self.data, item)<self.eps) return indexes[0]