import numpy as np import k_medias as km reader = np.genfromtxt("files/irisbin.csv", delimiter=',') data = reader[:, 0:4] y = reader[:, 4:] H, W = data.shape indexs = np.arange(0, H, 1) np.random.shuffle(indexs) k = 13 centroides, distancia, grupoCentroides = km.k_medias_tol(data,indexs, k, 0.05, 500) varianzas = np.array([np.var(data[indexs[grupoCentroides == i], : ], axis = 0) for i in range(k)]) var = np.mean( np.mean(varianzas, axis = 1)) transfGauss = km.gauss_k_medias(data, centroides, k) outputGauss = np.append(transfGauss, y, axis = 1) np.savetxt("files/iris_k_medias_" + str(k) + ".csv", outputGauss, delimiter = ",")
reader = np.genfromtxt("files/merval.csv", delimiter=',') #armar al base de datos data = np.array([ reader[i * 6 : (i+1) * 6] for i in range(int(reader.shape[0] / 6)) ]) inData = data[:, 0:5] yd = np.expand_dims(data[:,5], axis = 1) H = inData.shape[0] indexs = np.arange(0, H, 1) np.random.shuffle(indexs) minK = 15 maxK = 35 distPromV = np.ones((maxK-minK,1)) results = [km.k_medias_tol(inData, indexs, k, 0.05, 400, 200) for k in range(minK, maxK)] for k in range(minK, maxK): distPromV[k-minK] = results[k-minK][1] fig = plt.figure() ax = fig.add_subplot(111) ax.plot(np.arange(minK, maxK), distPromV) ax.scatter(np.arange(minK, maxK), distPromV) ax.set_title("Curva k_medias Merval") ax.set_xticks(np.arange(minK, maxK)) ax.set_xlabel(" K ")
import k_medias as km from matplotlib import pyplot as plt import multiprocessing as mp np.random.seed(124394140) reader = np.genfromtxt("files/XOR_trn.csv", delimiter=',') data = reader[:, 0:2] y = np.expand_dims(reader[:, 2], axis = 1) H, W = data.shape indexs = np.arange(0, H, 1) np.random.shuffle(indexs) k = 4 centroides, distProm, grupos = km.k_medias_tol(data,indexs, k) for i in range(k): plt.scatter(data[indexs[grupos == i], 0], data[indexs[grupos == i], 1]) plt.scatter(centroides[i][0], centroides[i][1], marker="^") plt.show() transfGauss = km.gauss_k_medias(data, centroides, k) outputGauss = np.append(transfGauss, y, axis = 1) #np.savetxt("files/xor_k_medias_" + str(k) + ".csv", outputGauss, delimiter = ",")