Ejemplo n.º 1
0
def main():
    clusteringMethod = getClusteringMethod()
    filename = getDataset()
    if clusteringMethod == "k":
        kmeans.main(filename)
    elif clusteringMethod == "s":
        spectralClustering.main(filename)
Ejemplo n.º 2
0
def main(path):
	d = [10, 50, 100, 200, 300, 400, 500, 1000, 2000]
	l = []
	for x in d:
		print 'Starting kmeans with ' + str(x) +' centroids...'
		kmeans.main(path, 400)
		print 'Finished kmeans, proceding with centroidFinder...'
		centroidFinderP.main(path, x)
		print 'Finished CF, running kfold10...'
		a = kfold.main(path, 10)
		l.append(a)
	print l
Ejemplo n.º 3
0
def main(k, n, Random):
	"""
	The main function of the code

	:param k: int , amount of centers
	:param n: int, number of points
	:param Random: boolean, the Random variable described in the assignment

	"""
	# import is done here inorder to prevent calling the kmeans module (from tasks.py) before build.
	import kmeans

	if not Random and (k is None or n is None):
		Error('Unable to determine k, n', __file__)

	if (k is not None) and (n is not None):
		k, n = int(k), int(n)
		if k >= n:
			Error('k >= n', __file__)

	print_max_capacity()

	d = random.randint(2, 3)
	n, K, X, centers = create_data(n, d, k, Random)  # this returned K is the one that was used in the data generation

	print(f'The k that was used (K) to create the data is {K}')
	print(f'The n that was used to create the data is {n}')
	print(f'The d that was used to create the data is {d}')

	files.build_data_text_file(X, centers)

	"""
	Both The Spectral and the Kmeans algorithms end up sending vectors to the kmeans module.
	The only difference is which vectors.
	Here those vectors (spectral_observations & kmeans_observations) are being computed.
	
	Note:   The if else section here is needed because
	        --> if Random=True, k should be computed by the
	            eigengap heuristic, and that k will be of use in the following code.
	        --> else, the actual k is needed when computing U in the spectral module.
	"""
	if Random:
		spectral_observations, k = spectral.main(X)
	else:
		spectral_observations, _ = spectral.main(X, k)  # else => k = K
	kmeans_observations = X

	spectral_res = kmeans.main(spectral_observations, k, n, k)
	kmeans_res = kmeans.main(kmeans_observations, k, n, d)

	files.build_clusters_text_file(k, spectral_res, kmeans_res)
	files.build_clusters_pdf_file(K, k, n, d, spectral_res, kmeans_res, centers)
Ejemplo n.º 4
0
def main(argv):
	argc = len(argv)
	
	if (argc is not 5):
		print '\nWarning: incorrect argument(s) to bagOfWords.py. Expected arguments:\n\n' \
		'\t- train_dir (required)\t: directory containing training *.fts files.\n' \
		'\t- test_dir (required)\t: directory containing testing *.fts files.\n' \
		'\t- N_fts (required)\t: number of *.fts files to use for clustering.\n'\
		'\t- K (required)\t\t: number of clusters in K-Means clustering\n'
		sys.exit(1)
	
	train_dir = argv[1]
	test_dir = argv[2]
	n_fts = int(argv[3])
	n_clusters = int(argv[4])
	
	# 1. Combine files containing SIFT features of individual images into a single file for clustering.
	print '\n Preparing for clustering...\n'
	concatFile = concatenateFeatures.main([train_dir, n_fts])
	
	# 2. Cluster features into K clusters with k-means.
	print '\n Clustering...\n'
	centersFile = kmeans.main([concatFile, n_clusters])
	
	# 3. Generate histograms for each training file using the cluster centers.
	print '\n Generating histograms for training files...\n'
	generateHistogram.main([centersFile, train_dir])
	
	# 4. Generate histograms for each testing file using the cluster centers.
	print '\n Generating histograms for testing files...\n'
	generateHistogram.main([centersFile, test_dir])
	
	print '\n Bag of Words done!\n'
Ejemplo n.º 5
0
def main(argv):
    argc = len(argv)

    if (argc is not 5):
        print '\nWarning: incorrect argument(s) to bagOfWords.py. Expected arguments:\n\n' \
        '\t- train_dir (required)\t: directory containing training *.fts files.\n' \
        '\t- test_dir (required)\t: directory containing testing *.fts files.\n' \
        '\t- N_fts (required)\t: number of *.fts files to use for clustering.\n'\
        '\t- K (required)\t\t: number of clusters in K-Means clustering\n'
        sys.exit(1)

    train_dir = argv[1]
    test_dir = argv[2]
    n_fts = int(argv[3])
    n_clusters = int(argv[4])

    # 1. Combine files containing SIFT features of individual images into a single file for clustering.
    print '\n Preparing for clustering...\n'
    concatFile = concatenateFeatures.main([train_dir, n_fts])

    # 2. Cluster features into K clusters with k-means.
    print '\n Clustering...\n'
    centersFile = kmeans.main([concatFile, n_clusters])

    # 3. Generate histograms for each training file using the cluster centers.
    print '\n Generating histograms for training files...\n'
    generateHistogram.main([centersFile, train_dir])

    # 4. Generate histograms for each testing file using the cluster centers.
    print '\n Generating histograms for testing files...\n'
    generateHistogram.main([centersFile, test_dir])

    print '\n Bag of Words done!\n'
Ejemplo n.º 6
0
 def handleButton(self):
     # print blah
     # print party.blah(1)
     # print dir(self.lineEdit)
     # if self.wind is None:
     strt = tfidfclassification.blah(self.lineEdit.text())
     strr = kmeans.main(self.lineEdit.text())
     wind = secWin(self)
     wind.wind1 = strt
     wind.wind2 = strr
     self.hide()
     wind.show()
Ejemplo n.º 7
0
def upload_numeric_cluster():
    if request.method == 'POST':
        # check if the post request has the file part
        if 'file' not in request.files:
            flash('No file part')
            return redirect(request.url)
        file = request.files['file']
        # if user does not select file, browser also
        # submit an empty part without filename
        if file.filename == '':
            flash('No selected file')
            return redirect(request.url)
        if file and allowed_file(file.filename):
            filename = secure_filename(file.filename)
            if filename == "kmean-config.config":
                file.save(
                    os.path.join(
                        '/home/nexxus/Python/Final_Year/static/files/kmeans',
                        filename))
                CONFIG_kMEAN = '/home/nexxus/Python/Final_Year/static/files/kmeans/' + filename
                from kmeans import main
                main(CONFIG_kMEAN)

            elif filename == "meanshift-config.config":
                file.save(
                    os.path.join(
                        '/home/nexxus/Python/Final_Year/static/files/meanshift',
                        filename))
                config_meanshift = '/home/nexxus/Python/Final_Year/static/files/meanshift/' + filename
                from Meanshift import main
                main(config_meanshift)
            else:
                file.save(
                    os.path.join('/home/nexxus/Python/Final_Year/static/csv',
                                 filename))

            return redirect(request.url)
    return render_template('numericclustering.html')
Ejemplo n.º 8
0
def main(args):
#    DP = open("datapoints.txt","w").close()
    SP = open("stat_plot.txt","w").close()
    ST = open("statistics.txt","w").close()
    num_points=0
    pointx = ''
    while num_points <500000:

        num_iter = 0
        num_points +=50000
        num_generated = 50000
        # Create num_points random Points in n-dimensional space
#        num_gen, coords, lowerx, upperx, lowery, uppery = int(num_generated*.1), 2,1, 900, 1,900
 #       pointx = makeRandomPoint(num_gen, lowerx, upperx, lowery, uppery)
  #      num_gen, coords, lowerx, upperx, lowery, uppery = int(num_generated*.2), 2,50, 350, 50,500
   #     pointx = makeRandomPoint(num_gen, lowerx, upperx, lowery, uppery)

    #    num_gen, coords, lowerx, upperx, lowery, uppery = int(num_generated*.2), 2,410, 600,10,550
     #   pointx = makeRandomPoint(num_gen, lowerx, upperx, lowery, uppery)
    	#num_gen, coords, lowerx, upperx, lowery, uppery = int(num_generated*.2), 2,600, 890, 600,900
    #	pointx = makeRandomPoint(num_gen, lowerx, upperx, lowery, uppery)

#        num_gen, coords, lowerx, upperx, lowery, uppery = int(num_generated*.1), 2,150, 300,650,900
 #       pointx = makeRandomPoint(num_gen, lowerx, upperx, lowery, uppery)
  #      num_gen, coords, lowerx, upperx, lowery, uppery = int(num_generated*.2), 2,650, 880,50,470
   #     pointx = makeRandomPoint(num_gen, lowerx, upperx, lowery, uppery)

        while num_iter <1:
            num_iter +=1

            filename = "centroid2d_" + `num_iter` +".txt"
    	    shutil.copy(filename,"centroidinput.txt")
    	    datafilename = "datapoints" + `num_points` + ".txt"
    	    #shutil.copy("datapoints1000.txt","datapoints.txt")
    	   # shutil.copy("datapoints.txt", datafilename)
    	    shutil.copy(datafilename,"datapoints.txt")
            kmeans.main(num_points)
            # final clustering of the datapoints
 #           os.system("python mapperfinal.py")
            #Renaming and finalizing of files
            newfilename = "statistics_4n_" + `num_points` +"_"+ `num_iter` +".txt"
            os.rename("statistics.txt", newfilename)
#            newname = "cluster1_" + `num_points` +".txt"
 #           os.rename("cluster1.txt", newname)
  #          newname = "cluster2_" + `num_points` +".txt"
   #         os.rename("cluster2.txt", newname)
    #        newname = "cluster3_" + `num_points` +".txt"
     #       os.rename("cluster3.txt", newname)
      #      newname = "cluster4_" + `num_points` +".txt"
       #     os.rename("cluster4.txt", newname)
            if os.path.exists("cluster5.txt"):
                newname = "cluster5_" + `num_points` +".txt"
                os.rename("cluster5.txt", newname)
        #    newname = "cluster6_" + `num_points` +".txt"
         #   os.rename("cluster6.txt", newname)
    #plot.main()

#    DP = open("datapoints.txt","w").close()
#    SP = open("stat_plot3d.txt","w").close()

    num_points=0
    pointx = ''
    while num_points <500000:
        num_iter = 0
        num_points +=50000
        num_generated = 50000
#        # Create num_points random Points in n-dimensional space

#        num_gen, coords, lowerx, upperx, lowery, uppery,lower,upper = int(num_generated*.1), 2,1, 900, 1,900,1,900
 #       pointx = makeRandom3dPoint(num_gen, lowerx, upperx, lowery, uppery,lower,upper)
  #      num_gen, coords, lowerx, upperx, lowery, uppery,lower,upper = int(num_generated*.2), 2,50, 250, 50,300,50,400

   # 	pointx = makeRandom3dPoint(num_gen, lowerx, upperx, lowery, uppery,lower,upper)
    #	num_gen, coords, lowerx, upperx, lowery, uppery,lower,upper = int(num_generated*.2), 2,510, 700,0,250,0,200
     #   pointx = makeRandom3dPoint(num_gen, lowerx, upperx, lowery, uppery,lower,upper)

#        num_gen, coords, lowerx, upperx, lowery, uppery,lower,upper = int(num_generated*.2), 2,0, 200,650,900,600,750
 #       pointx = makeRandom3dPoint(num_gen, lowerx, upperx, lowery, uppery,lower,upper)
  #      num_gen, coords, lowerx, upperx, lowery, uppery,lower,upper = int(num_generated*.2), 2,650, 880,50,470,800,900
   #     pointx = makeRandom3dPoint(num_gen, lowerx, upperx, lowery, uppery,lower,upper)
    #    num_gen, coords, lowerx, upperx, lowery, uppery,lower,upper = int(num_generated*.1), 2,800, 900,700,900,700,900
     #   pointx = makeRandom3dPoint(num_gen, lowerx, upperx, lowery, uppery,lower,upper)

        while num_iter <1:
            num_iter +=1
            print num_iter
            filename = "centroid3d_" + `num_iter` +".txt"
    	    shutil.copy(filename,"centroidinput.txt")
    	    datafilename = "datapoints3d" + `num_points` + ".txt"
    	    #shutil.copy("datapoints1000.txt","datapoints.txt")
#    	    shutil.copy("datapoints.txt", datafilename)
    	    shutil.copy(datafilename, "datapoints.txt")
            kmeans3d.main(num_points)
            # final clustering of the datapoints
 #           os.system("python mapperfinal3d.py")
            #Renaming and finalizing of files
            newfilename = "statistics3d_4n_" + `num_points` +"_"+ `num_iter` +".txt"
            os.rename("statistics3d.txt", newfilename)
Ejemplo n.º 9
0
#          INNER join Peaqock.dbo.Clients  ON IdClient=IdPersonne)""",con)
#

#Id_Personne_KYC = list(set(clustered.IdPersonne))
#data = pd.DataFrame()
#for ix in Id_Personne_KYC :
#    if data.empty :
#        data = df[df['IdPersonne'] == ix]
#    else :
#        data = pd.concat([df[df['IdPersonne'] == ix],data])
#df = data

df = pd.read_csv('./clients_questionnaire.csv')
df = df.drop('Unnamed: 0', axis=1)

clustered = main()
clustered.index = clustered.IdPersonne

print('Importation des données terminée.', int(time.time() - timee))

##Suppression des variables inutiles ou vides
#list_to_drop = ['Unnamed: 0','IdPersonne','IdCompteEspece','IdOrdreEx','CodeRejet','PrixStop','Marge','QteMinimale','QteDevoilee','NamedOrder','StatusOrdre']
#df = df.drop(list_to_drop,axis = 1)

#Ajout de features
df['TypeValeur'] = df['IdTypeValeur'].replace(all_id_TypeValeurs,
                                              all_label_TypeValeurs)

#%%Extraction des montants Brut pour chaque annee pour chaque client
# Base de données : dbase_montant_brut
Ejemplo n.º 10
0
import os
import kmeans

if __name__ == '__main__':

    for i in range(30):

        filenames_eachrun = 'C:/Users/faica/OneDrive/Documents/dev/clustering_od/results.csv'
        results_file = 'C:/Users/faica/OneDrive/Documents/dev/clustering_od/results_agg.csv'
        kmeans.main()
        print()
        print('Run :' + str(i))

        with open(results_file, "a") as f_end:

            with open(filenames_eachrun, "rt") as f:
                #reading first line to save the column names
                firstline = f.readline()
                firstline = firstline.rstrip('\n') + ',run' + '\n'
                f_end.write(firstline)
                for line in f:
                    line = line.rstrip('\n') + ',' + str(i) + '\n'
                    f_end.write(line)