def find_centroids(centroids, data, labels, pairwise_distances, zero_point, C, D): # Get the number of points associated with each centroid counts = np.bincount(labels, minlength=C) # Build label masks for each centroid and sum across all the # points assocated with each new centroid distance_sum = 0.0 for idx in range(C): # Boolean mask indicating where the points are for this center centroid_mask = labels == idx centroids[idx, :] = np.sum(np.where(centroid_mask[..., np.newaxis], data, zero_point), axis=0) distance_sum += np.sum( np.where(centroid_mask, pairwise_distances[:, idx], 0.0)) # To avoid introducing divide by zero errors # If a centroid has no weight, we'll do no normalization # This will keep its coordinates defined. counts = np.maximum(counts, np.ones((1, ), dtype=np.uint64)) centroids /= counts[:, np.newaxis] return distance_sum
def cnd(d): A1 = 0.31938153 A2 = -0.356563782 A3 = 1.781477937 A4 = -1.821255978 A5 = 1.330274429 RSQRT2PI = 0.39894228040143267793994605993438 K = 1.0 / (1.0 + 0.2316419 * np.absolute(d)) cnd = (RSQRT2PI * np.exp(-0.5 * d * d) * (K * (A1 + K * (A2 + K * (A3 + K * (A4 + K * A5)))))) return np.where(d > 0, 1.0 - cnd, cnd)
def test(): # np.random.seed(42) # anp = np.array([1, 54, 4 , 4, 0, 45, 5, 58, 0, 9, 0, 4, 0, 0, 0, 5, 0]) # a = lg.array(anp) # assert(lg.array_equal(np.where(anp), lg.where(a))) # cnp = np.array([1, 54, 4 , 4, 0, 45, 5, 58, 0, 9, 0, 4, 0, 0, 0, 5, 0, 1]).reshape((6,3)) # noqa E501 # c = lg.array(cnp) # bnp = np.random.randn(6,3) # b = lg.array(bnp) # assert(lg.array_equal(lg.extract(c, b), np.extract(cnp, bnp))) anp = np.array([[True, False], [True, True]]) xnp = np.array([[1, 2], [3, 4]]) ynp = np.array([[9, 8], [7, 6]]) a = lg.array(anp) x = lg.array(xnp) y = lg.array(ynp) assert np.array_equal(np.where(anp, xnp, ynp), lg.where(a, x, y))
def test(): np.random.seed(50) datanp = np.random.randn(2000000, 3) data = lg.array(datanp) pointsnp = np.random.choice(lg.arange(len(data)), 4, False) points = lg.array(pointsnp) centroids = data[points] centroidsnp = datanp[pointsnp] sqdists = lg.zeros((4, len(data))) sqdistsnp = np.zeros((4, len(datanp))) for i in range(4): vec = data - centroids[i] vecnp = datanp - centroidsnp[i] sqdists[i] = lg.linalg.norm(vec, axis=1) sqdistsnp[i] = np.linalg.norm(vecnp, axis=1) clusters = lg.argmin(sqdists, axis=0) clustersnp = np.argmin(sqdistsnp, axis=0) assert lg.array_equal(lg.where(clusters == 0), np.where(clustersnp == 0))
def test(): x = lg.array([[1, 2], [3, 4], [5, 6]]) assert lg.array_equal(x[[0, 1, 2], [0, 1, 0]], [1, 4, 5]) x = lg.array([[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, 10, 11]]) rows = lg.array([0, 3]) columns = lg.array([0, 2]) assert lg.array_equal(x[rows[:, np.newaxis], columns], [[0, 2], [9, 11]]) zg = lg.array([[-1.2 + 0.5j, 1.2 - 2j], [-2.2 + 3.5j, 4.2 - 6.2j]]) m = lg.array([[True, False], [False, True]]) assert lg.array_equal(zg[m], [-1.2 + 0.5j, 4.2 - 6.2j]) anp = np.array([[[2, 1], [3, 2]], [[2, 4], [4, 1]]]) a = lg.array(anp) nznp = anp < 3 nzgp = a < 3 assert lg.array_equal(anp[nznp], a[nzgp]) y = lg.array([[[True, True], [False, True]], [[True, False], [False, True]]]) z = lg.nonzero(y) assert lg.array_equal(a[z], lg.array([2, 1, 2, 2, 1])) np.random.seed(42) anp = np.random.randn(10, 10, 4) a = lg.array(anp) bnp = np.array([3, 4, 6]) cnp = np.array([1, 4, 5]) b = lg.array(bnp) c = lg.array(cnp) assert lg.array_equal(a[b], anp[bnp]) assert lg.array_equal(a[(b, c)], anp[(b, c)]) onesnp = np.zeros(10, int) ones = lg.zeros(10, int) dnp = np.random.randn(20, 4) d = lg.array(dnp) assert lg.array_equal(dnp[np.where(onesnp)], d[lg.where(ones)])