def main(): clusteringMethod = getClusteringMethod() filename = getDataset() if clusteringMethod == "k": kmeans.main(filename) elif clusteringMethod == "s": spectralClustering.main(filename)
def main(path): d = [10, 50, 100, 200, 300, 400, 500, 1000, 2000] l = [] for x in d: print 'Starting kmeans with ' + str(x) +' centroids...' kmeans.main(path, 400) print 'Finished kmeans, proceding with centroidFinder...' centroidFinderP.main(path, x) print 'Finished CF, running kfold10...' a = kfold.main(path, 10) l.append(a) print l
def main(k, n, Random): """ The main function of the code :param k: int , amount of centers :param n: int, number of points :param Random: boolean, the Random variable described in the assignment """ # import is done here inorder to prevent calling the kmeans module (from tasks.py) before build. import kmeans if not Random and (k is None or n is None): Error('Unable to determine k, n', __file__) if (k is not None) and (n is not None): k, n = int(k), int(n) if k >= n: Error('k >= n', __file__) print_max_capacity() d = random.randint(2, 3) n, K, X, centers = create_data(n, d, k, Random) # this returned K is the one that was used in the data generation print(f'The k that was used (K) to create the data is {K}') print(f'The n that was used to create the data is {n}') print(f'The d that was used to create the data is {d}') files.build_data_text_file(X, centers) """ Both The Spectral and the Kmeans algorithms end up sending vectors to the kmeans module. The only difference is which vectors. Here those vectors (spectral_observations & kmeans_observations) are being computed. Note: The if else section here is needed because --> if Random=True, k should be computed by the eigengap heuristic, and that k will be of use in the following code. --> else, the actual k is needed when computing U in the spectral module. """ if Random: spectral_observations, k = spectral.main(X) else: spectral_observations, _ = spectral.main(X, k) # else => k = K kmeans_observations = X spectral_res = kmeans.main(spectral_observations, k, n, k) kmeans_res = kmeans.main(kmeans_observations, k, n, d) files.build_clusters_text_file(k, spectral_res, kmeans_res) files.build_clusters_pdf_file(K, k, n, d, spectral_res, kmeans_res, centers)
def main(argv): argc = len(argv) if (argc is not 5): print '\nWarning: incorrect argument(s) to bagOfWords.py. Expected arguments:\n\n' \ '\t- train_dir (required)\t: directory containing training *.fts files.\n' \ '\t- test_dir (required)\t: directory containing testing *.fts files.\n' \ '\t- N_fts (required)\t: number of *.fts files to use for clustering.\n'\ '\t- K (required)\t\t: number of clusters in K-Means clustering\n' sys.exit(1) train_dir = argv[1] test_dir = argv[2] n_fts = int(argv[3]) n_clusters = int(argv[4]) # 1. Combine files containing SIFT features of individual images into a single file for clustering. print '\n Preparing for clustering...\n' concatFile = concatenateFeatures.main([train_dir, n_fts]) # 2. Cluster features into K clusters with k-means. print '\n Clustering...\n' centersFile = kmeans.main([concatFile, n_clusters]) # 3. Generate histograms for each training file using the cluster centers. print '\n Generating histograms for training files...\n' generateHistogram.main([centersFile, train_dir]) # 4. Generate histograms for each testing file using the cluster centers. print '\n Generating histograms for testing files...\n' generateHistogram.main([centersFile, test_dir]) print '\n Bag of Words done!\n'
def handleButton(self): # print blah # print party.blah(1) # print dir(self.lineEdit) # if self.wind is None: strt = tfidfclassification.blah(self.lineEdit.text()) strr = kmeans.main(self.lineEdit.text()) wind = secWin(self) wind.wind1 = strt wind.wind2 = strr self.hide() wind.show()
def upload_numeric_cluster(): if request.method == 'POST': # check if the post request has the file part if 'file' not in request.files: flash('No file part') return redirect(request.url) file = request.files['file'] # if user does not select file, browser also # submit an empty part without filename if file.filename == '': flash('No selected file') return redirect(request.url) if file and allowed_file(file.filename): filename = secure_filename(file.filename) if filename == "kmean-config.config": file.save( os.path.join( '/home/nexxus/Python/Final_Year/static/files/kmeans', filename)) CONFIG_kMEAN = '/home/nexxus/Python/Final_Year/static/files/kmeans/' + filename from kmeans import main main(CONFIG_kMEAN) elif filename == "meanshift-config.config": file.save( os.path.join( '/home/nexxus/Python/Final_Year/static/files/meanshift', filename)) config_meanshift = '/home/nexxus/Python/Final_Year/static/files/meanshift/' + filename from Meanshift import main main(config_meanshift) else: file.save( os.path.join('/home/nexxus/Python/Final_Year/static/csv', filename)) return redirect(request.url) return render_template('numericclustering.html')
def main(args): # DP = open("datapoints.txt","w").close() SP = open("stat_plot.txt","w").close() ST = open("statistics.txt","w").close() num_points=0 pointx = '' while num_points <500000: num_iter = 0 num_points +=50000 num_generated = 50000 # Create num_points random Points in n-dimensional space # num_gen, coords, lowerx, upperx, lowery, uppery = int(num_generated*.1), 2,1, 900, 1,900 # pointx = makeRandomPoint(num_gen, lowerx, upperx, lowery, uppery) # num_gen, coords, lowerx, upperx, lowery, uppery = int(num_generated*.2), 2,50, 350, 50,500 # pointx = makeRandomPoint(num_gen, lowerx, upperx, lowery, uppery) # num_gen, coords, lowerx, upperx, lowery, uppery = int(num_generated*.2), 2,410, 600,10,550 # pointx = makeRandomPoint(num_gen, lowerx, upperx, lowery, uppery) #num_gen, coords, lowerx, upperx, lowery, uppery = int(num_generated*.2), 2,600, 890, 600,900 # pointx = makeRandomPoint(num_gen, lowerx, upperx, lowery, uppery) # num_gen, coords, lowerx, upperx, lowery, uppery = int(num_generated*.1), 2,150, 300,650,900 # pointx = makeRandomPoint(num_gen, lowerx, upperx, lowery, uppery) # num_gen, coords, lowerx, upperx, lowery, uppery = int(num_generated*.2), 2,650, 880,50,470 # pointx = makeRandomPoint(num_gen, lowerx, upperx, lowery, uppery) while num_iter <1: num_iter +=1 filename = "centroid2d_" + `num_iter` +".txt" shutil.copy(filename,"centroidinput.txt") datafilename = "datapoints" + `num_points` + ".txt" #shutil.copy("datapoints1000.txt","datapoints.txt") # shutil.copy("datapoints.txt", datafilename) shutil.copy(datafilename,"datapoints.txt") kmeans.main(num_points) # final clustering of the datapoints # os.system("python mapperfinal.py") #Renaming and finalizing of files newfilename = "statistics_4n_" + `num_points` +"_"+ `num_iter` +".txt" os.rename("statistics.txt", newfilename) # newname = "cluster1_" + `num_points` +".txt" # os.rename("cluster1.txt", newname) # newname = "cluster2_" + `num_points` +".txt" # os.rename("cluster2.txt", newname) # newname = "cluster3_" + `num_points` +".txt" # os.rename("cluster3.txt", newname) # newname = "cluster4_" + `num_points` +".txt" # os.rename("cluster4.txt", newname) if os.path.exists("cluster5.txt"): newname = "cluster5_" + `num_points` +".txt" os.rename("cluster5.txt", newname) # newname = "cluster6_" + `num_points` +".txt" # os.rename("cluster6.txt", newname) #plot.main() # DP = open("datapoints.txt","w").close() # SP = open("stat_plot3d.txt","w").close() num_points=0 pointx = '' while num_points <500000: num_iter = 0 num_points +=50000 num_generated = 50000 # # Create num_points random Points in n-dimensional space # num_gen, coords, lowerx, upperx, lowery, uppery,lower,upper = int(num_generated*.1), 2,1, 900, 1,900,1,900 # pointx = makeRandom3dPoint(num_gen, lowerx, upperx, lowery, uppery,lower,upper) # num_gen, coords, lowerx, upperx, lowery, uppery,lower,upper = int(num_generated*.2), 2,50, 250, 50,300,50,400 # pointx = makeRandom3dPoint(num_gen, lowerx, upperx, lowery, uppery,lower,upper) # num_gen, coords, lowerx, upperx, lowery, uppery,lower,upper = int(num_generated*.2), 2,510, 700,0,250,0,200 # pointx = makeRandom3dPoint(num_gen, lowerx, upperx, lowery, uppery,lower,upper) # num_gen, coords, lowerx, upperx, lowery, uppery,lower,upper = int(num_generated*.2), 2,0, 200,650,900,600,750 # pointx = makeRandom3dPoint(num_gen, lowerx, upperx, lowery, uppery,lower,upper) # num_gen, coords, lowerx, upperx, lowery, uppery,lower,upper = int(num_generated*.2), 2,650, 880,50,470,800,900 # pointx = makeRandom3dPoint(num_gen, lowerx, upperx, lowery, uppery,lower,upper) # num_gen, coords, lowerx, upperx, lowery, uppery,lower,upper = int(num_generated*.1), 2,800, 900,700,900,700,900 # pointx = makeRandom3dPoint(num_gen, lowerx, upperx, lowery, uppery,lower,upper) while num_iter <1: num_iter +=1 print num_iter filename = "centroid3d_" + `num_iter` +".txt" shutil.copy(filename,"centroidinput.txt") datafilename = "datapoints3d" + `num_points` + ".txt" #shutil.copy("datapoints1000.txt","datapoints.txt") # shutil.copy("datapoints.txt", datafilename) shutil.copy(datafilename, "datapoints.txt") kmeans3d.main(num_points) # final clustering of the datapoints # os.system("python mapperfinal3d.py") #Renaming and finalizing of files newfilename = "statistics3d_4n_" + `num_points` +"_"+ `num_iter` +".txt" os.rename("statistics3d.txt", newfilename)
# INNER join Peaqock.dbo.Clients ON IdClient=IdPersonne)""",con) # #Id_Personne_KYC = list(set(clustered.IdPersonne)) #data = pd.DataFrame() #for ix in Id_Personne_KYC : # if data.empty : # data = df[df['IdPersonne'] == ix] # else : # data = pd.concat([df[df['IdPersonne'] == ix],data]) #df = data df = pd.read_csv('./clients_questionnaire.csv') df = df.drop('Unnamed: 0', axis=1) clustered = main() clustered.index = clustered.IdPersonne print('Importation des données terminée.', int(time.time() - timee)) ##Suppression des variables inutiles ou vides #list_to_drop = ['Unnamed: 0','IdPersonne','IdCompteEspece','IdOrdreEx','CodeRejet','PrixStop','Marge','QteMinimale','QteDevoilee','NamedOrder','StatusOrdre'] #df = df.drop(list_to_drop,axis = 1) #Ajout de features df['TypeValeur'] = df['IdTypeValeur'].replace(all_id_TypeValeurs, all_label_TypeValeurs) #%%Extraction des montants Brut pour chaque annee pour chaque client # Base de données : dbase_montant_brut
import os import kmeans if __name__ == '__main__': for i in range(30): filenames_eachrun = 'C:/Users/faica/OneDrive/Documents/dev/clustering_od/results.csv' results_file = 'C:/Users/faica/OneDrive/Documents/dev/clustering_od/results_agg.csv' kmeans.main() print() print('Run :' + str(i)) with open(results_file, "a") as f_end: with open(filenames_eachrun, "rt") as f: #reading first line to save the column names firstline = f.readline() firstline = firstline.rstrip('\n') + ',run' + '\n' f_end.write(firstline) for line in f: line = line.rstrip('\n') + ',' + str(i) + '\n' f_end.write(line)