def buildProductClusters(): global transpose transpose = c.matrix.transpose() cl.__init__(transpose, p.products) catNum = len(p.products)/8 + 1 outputs = cl.kMeans(catNum,8) return outputs
def main(): auxiliar = input( "Qual o arquivo que deseja inserir?\n 1 = c2ds1-2sp\n 2 = c2ds3-2g\n 3 = monkey\n" ) if auxiliar == "1": p = "c2ds1-2sp" elif auxiliar == '2': p = "c2ds3-2g" elif auxiliar == '3': p = "monkey" path = "bases/" + p + ".txt" # file = pandas.read_csv(path, sep='\t') # data = file.iloc[:, :].values data = readDataSet(path) nClusters = int(input("Quantos clusters voce deseja utilizar?\n")) nIterations = int(input("Quantas iteracoes voce deseja realizar?\n")) # incializa o kmeans result = clustering.kMeans(nClusters, nIterations, data, p) # itera o kmeans result.fit() #plots.kMeansPlot(result) pat = "generated/" + p + "kMeans" + \ "C" + str(nClusters) + "_" + "I" + str(nIterations) + '.clu' if (auxiliar == '3'): for aaa in result.data: aaa.cluster += 1 printPartition(pat, result.data)
def createSubcluster(indexMap, subMatrix, aMap): cl.__init__(subMatrix, c.customers, aMap) clust = [] results = cl.kMeans(25,8) clusters = results[0] clust.append(clusters) centroids = results[1] clust.append(centroids) clust.append(cl.clusterMap) clust.append(indexMap) clust.append(s.averageSilhouettes(clust[0], subMatrix)) clust.append(aMap) return clust
def run(names): global products products = p.products results = [names, c.customersMap] global transpose transpose = c.matrix.transpose() cl.__init__(transpose, p.products) catNum = len(p.products)/8 + 1 outputs = cl.kMeans(catNum,8) prodClusters = outputs[0] centroids = outputs[1] inputs = st.subMatrices(prodClusters) prodClusters = n.normalizeProdClusters(prodClusters, centroids, inputs[0], inputs[1], 0.2, 0.4) results.append(prodClusters) inputs = st.subMatrices(prodClusters) subMats = inputs[0] maps = inputs[1] indexMap = inputs[2] subClusters = [] for i in range(0, len(subMats)): subCluster = st.createSubcluster(indexMap[i], subMats[i], maps[i]) subCluster.append(r.buildRecommendations(names, [subCluster])) subClusters.append(subCluster) totCluster = st.createSubcluster(p.products, c.matrix, p.productsMap) totCluster.append(r.buildRecommendations(names,[totCluster])) powerClusters = [] powerSil = [] results.append('unfiltered results: ' + str(totCluster[4])) for i in range(0, len(subClusters)): if subClusters[i][4] >= totCluster[4]: powerClusters.append(subClusters[i]) powerSil.append(subClusters[i][4]) if(len(powerSil) == 0): return 'again' else: results.append('filtered average: ' + str(sum(powerSil)/len(powerSil))) powerClusters.append(totCluster) recommendationMatrix = r.buildRecommendations(names, powerClusters) results.append(recommendationMatrix) results.append(powerClusters) results.append(subClusters) return results
def main(): dataSet = [] nomeArquivo = '' while True: opcao = int(input('Entre com a opcao:\n[1] - Escolher o arquivo de dados \n[2] - Aplicar kMeans\n[3] - Aplicar Single-Link\n[4] - Aplicar Avg-Link\n[0] - Sair\n')) if(opcao == 1): oparquivo = int(input('[1] - c2ds1-2sp.txt\n[2] - c2ds3-2g.txt\n[3] - monkey.txt\n')) if(oparquivo == 1): nomeArquivo = 'c2ds1-2sp.txt' elif(oparquivo == 2): nomeArquivo = 'c2ds3-2g.txt' elif(oparquivo == 3): nomeArquivo = 'monkey.txt' dataSet = readDataSet(nomeArquivo) elif(opcao == 0): break elif(nomeArquivo == ''): print('Nenhum arquivo selecionado.') elif(opcao == 2): name = nomeArquivo nClusters = int(input('Digite a quantidade de clusters: ')) times = int(input('Digite a quantidade de iteracoes: ')) partition = cl.kMeans(dataSet,nClusters,times) rmkPartition = outPartition(dataSet,partition) complete = 'k'+str(nClusters)+'t'+str(times)+'.clu' printPartition('kMeans/'+name.replace('.txt', complete), rmkPartition) #pl.plotKMeansMonkey(dataSet, rmkPartition) elif(opcao == 3): name = nomeArquivo kMin = int(input('Digite a quantidade minima de clusters: ')) kMax = int(input('Digite a quantidade maxima de clusters: ')) partition = cl.singleLink(dataSet, kMin, kMax) k = kMax for each in partition: rmkPartition = outPartition(dataSet,each) complete = 'k'+str(k)+'.clu' printPartition('singleLink/'+name.replace('.txt', complete), rmkPartition) pl.plotSingleLinkMonkey(dataSet, rmkPartition, k) k-=1 elif(opcao == 4): kMin = int(input('Digite a quantidade minima de clusters: ')) kMax = int(input('Digite a quantidade maxima de clusters: ')) partition = cl.avgLink(dataSet, kMin, kMax)
def createClusterHelpers(indexMap, subMatrix, aMap): cl.__init__(subMatrix, c.customers, aMap) clust = [] results = cl.kMeans(25,8) clusters = results[0] # index 0 clust.append(clusters) centroids = results[1] # index 1 clust.append(centroids) # index 2 clust.append(cl.clusterMap) # index 3 clust.append(indexMap) avgSils = s.averageSilhouettes(clust[0], subMatrix, centroids) # index 4 clust.append(s.silhouettesList) # index 5 clust.append(avgSils) return clust
def dissolve(clusts, centroids, mats, maps, i): trans = mats[i].transpose() cl.__init__(trans, clusts[i], maps[i]) num = len(clusts[i])/8+1 results = cl.kMeans(num, 20) pClusts = results[0] pCents = results[1] clusts.pop(i) centroids.pop(i) mats.pop(i) maps.pop(i) for j in range(0, len(pClusts)): clusts.append(pClusts[j]) centroids.append(pCents[j]) newMat = [] newMap = {} st.redoMatrix(clusts,len(clusts)-1,newMat, newMap) mats.append(newMat) maps.append(newMap)
def dissolve(clusts, centroids, mats, maps, i): trans = mats[i].transpose() cl.__init__(trans, clusts[i], maps[i]) num = len(clusts[i]) / 8 + 1 results = cl.kMeans(num, 20) pClusts = results[0] pCents = results[1] clusts.pop(i) centroids.pop(i) mats.pop(i) maps.pop(i) for j in range(0, len(pClusts)): clusts.append(pClusts[j]) centroids.append(pCents[j]) newMat = [] newMap = {} st.redoMatrix(clusts, len(clusts) - 1, newMat, newMap) mats.append(newMat) maps.append(newMap)
def run(names): global products products = p.products # indexes 0 and 1 results = [names, c.customersMap] global transpose transpose = c.matrix.transpose() cl.__init__(transpose, p.products) catNum = len(p.products)/8 + 1 outputs = cl.kMeans(catNum,8) productClusters = outputs[0] centroids = outputs[1] inputs = st.subMatrices(productClusters) productClusters = n.normalizeProdClusters(productClusters, centroids, inputs[0], inputs[1], 0.2, 0.4) # index 2 results.append(productClusters) # index 3 results.append(p.productsMap) # index 4 results.append(products) inputs = st.subMatrices(productClusters) subMats = inputs[0] maps = inputs[1] indexMap = inputs[2] subClustersHelpers = [] for i in range(0, len(subMats)): subCluster = st.createSubclustersHelpers(indexMap[i], subMats[i], maps[i]) subCluster.append(r.buildRecommendations(names, [subCluster])) subClustersHelpers.append(subCluster) customerClustersHelpers = st.createSubclustersHelpers(p.products, c.matrix, p.productsMap) customerClustersHelpers.append(r.buildRecommendations(names,[customerClustersHelpers])) powerClustersHelpers = [] powerI = [] powerCount = 0 productClusterLocator = [] for i in range(0, len(subClustersHelpers)): if subClustersHelpers[i][5] >= customerClustersHelpers[5]: powerClustersHelpers.append(subClustersHelpers[i]) powerI.append(i) productClusterLocator.append(['power', powerCount]) powerCount += 1 else: productClusterLocator.append(['sub', i - powerCount]) if(len(powerClustersHelpers) == 0): return 'again' displacement = 0 for i in range(0,len(powerI)): subClustersHelpers.pop(powerI[i]-displacement) displacement += 1 powerRecMatrix = r.buildRecommendations(names, powerClustersHelpers) # index 5 results.append(powerRecMatrix) # index 6 results.append([customerClustersHelpers]) # index 7 results.append(subClustersHelpers) # index 8 results.append(powerClustersHelpers) # index 9 results.append(c.matrix) # index 10 productClustersMap = st.createClusterMap(productClusters) results.append(productClustersMap) results.append(productClusterLocator) return results