def __init__(self, number_of_gaussian): self.ng = number_of_gaussian self.gmm = GMM(number_of_gaussian) self.comparator_with_impostor = ComparatorWithImpostor( number_of_gaussian) self.comparator = Comparator(number_of_gaussian) self.cross_validator = CrossValidation(0.3)
class trainingClass: def __init__(self, size, output_wavefile): self.Training_info = GMM(size, output_wavefile); def Training_feature_Mean(self): Mean_training = self.Training_info.GMM_Model_Mean() return Mean_training def Training_feature_Weight(self): Weight_training = self.Training_info.GMM_Model_Weight() return Weight_training def Training_feature_Covar(self): Covar_training = self.Training_info.GMM_Model_Covar() return Covar_training def adjustFeatures(self,name,mainF): self.Training_info.adjustFeatures(name,mainF);
def cute_test(length): p_array = [] counter = 0 for i in range(0, 1000): #cause, effect = generate_continue_data(200, random.randint(1,3)) # random.randint(1,5) cause = GMM(5, 200) effect = GMM(8, 200) #effect, test2 = generate_continue_data(200, 3) # random.randint(1,5) cause = change_to_zero_one(cause) effect = change_to_zero_one(effect) cause2effect = bernoulli2(effect, length) - cbernoulli2( effect, cause, length) effect2cause = bernoulli2(cause, length) - cbernoulli2( cause, effect, length) #print 'cause' + ' -> ' + 'effect' + ':' + str(cause2effect) #print 'effect' + ' -> ' + 'cause' + ':' + str(effect2cause) p = math.pow(2, -(cause2effect - effect2cause)) p_array.append(p) if cause2effect > effect2cause: counter += 1 print print counter print bh_procedure(p_array, 0.05) return counter / 100.0
def main(): pref_path = os.getcwd() + "/classification_data_HWK2/EMGaussian" train_data = np.loadtxt(open(pref_path + ".data", "rb"), delimiter=" ") test_data = np.loadtxt(open(pref_path + ".test", "rb"), delimiter=" ") Xtrain = train_data[:, :2] Xtest = test_data[:, :2] models = {"GMM": GMM(isotropic=False), "HMM": HMM()} K = 4 #number of clusters for name in ["GMM", "HMM"]: print(name) model = models[name] model.fit(Xtrain, K, eps=pow(10, -2)) # visualize clusters and frontiers model.plot_clusters(Xtrain, "figs/" + name + " on train", save=True) model.plot_clusters(Xtest, "figs/" + name + " on test", save=True) print("") lik = model.compute_log_likelihood(Xtrain) print("mean log-likelihood on training set : ", lik / Xtrain.shape[0]) lik = model.compute_log_likelihood(Xtest) print("mean log-likelihood on test set : ", lik / Xtest.shape[0]) print("\n------------------------\n")
def main(): trainD, devD, testD = init() allD = Data(trainD, devD) if sys.argv[1] == "display": display(allD) exit(0) if sys.argv[1] == "train": # local settings x = trainD.nx() y = trainD.ny() xx = devD.nx() else: # submit settings x = allD.nx() y = allD.ny() xx = testD.nx() gmm1 = GMM(x[y == 1], round=500, K=4) gmm2 = GMM(x[y == 2], round=500, K=4) print("GMM1.dist: ", gmm1.pi) print("GMM2.dist: ", gmm2.pi) r1 = gmm1.predict(xx) * np.sum(y == 2) r2 = gmm2.predict(xx) * np.sum(y == 1) result = 1 + (r1 < r2) * 1 if sys.argv[1] == "train": # local settings print("accuracy: ", sum(result == devD.ny()) / devD.ny().shape[0]) else: # submit settings testD.y = list(result) testD.output()
def test_cluster_num(): for i in [2,3,4,5,6]: Data = np.load("./Data/cluster_"+str(i)+".npy") gmm = GMM(data = Data) gmm.aic() gmm.bic() vbem = VBEM(data = Data) vbem.train() vbem.show()
def test_sample_sizes(): for i in [10,20,50,100,200,300]: Data = np.load("./Data/data_"+str(i)+".npy") gmm = GMM(data = Data) gmm.aic() gmm.bic() vbem = VBEM(data = Data) vbem.train() vbem.show()
def test_dimension(): for i in [3,4,5,6,7]: Data = np.load("./Data/dimension_"+str(i)+".npy") gmm = GMM(data = Data) gmm.aic() gmm.bic() vbem = VBEM(data = Data) vbem.train() vbem.show()
def main(): pref_path = os.getcwd() + "/classification_data_HWK2/EMGaussian" train_data = np.loadtxt(open(pref_path + ".data", "rb"), delimiter=" ") test_data = np.loadtxt(open(pref_path + ".test", "rb"), delimiter=" ") Xtrain = train_data[:, :2] Xtest = test_data[:, :2] models = { "Kmeans": Kmeans(), "GMM_general": GMM(isotropic=False), "GMM_isotropic": GMM(isotropic=True) } K = 4 #number of clusters for name in ["Kmeans", "GMM_isotropic", "GMM_general"]: print(name) model = models[name] model.fit(Xtrain, 4) # visualize clusters and frontiers model.plot_clusters(Xtrain, name + " on train", save=False) model.plot_clusters(Xtest, name + " on test", save=False) if name in ["GMM_general", "GMM_isotropic"]: lik = model.compute_log_likelihood(Xtrain) print("mean log-likelihood on training set : ", lik / Xtrain.shape[0]) lik = model.compute_log_likelihood(Xtest) print("mean log-likelihood on test set : ", lik / Xtest.shape[0]) print("")
def main(): # declare variables K_value = 3 max_epoch = 1000 repeat_num = 8 repeat_num_gmm = 1 name = 'GMM_dataset.txt' # get the training data training_data = data_loading(name) if (sys.argv[1] == 'kmean'): # run the K-mean program program_name = './a.out' parameter_line = ' ' + 'training_kmeans ' + str(K_value) + ' ' + str( repeat_num) + ' 0' print('Running K-mean') os.system(program_name + parameter_line) print('The Program is done.') # read minimum SSE Position min_sse_pos = np.loadtxt('./min_sse_pos.csv', delimiter=',', skiprows=0) # read clusters from K-mean algorithm kmean_name = './all_cluster_center' + str(int(min_sse_pos)) + '.csv' kmean_clusters = np.loadtxt(kmean_name, delimiter=',', skiprows=0) for draw_ind in range(int(kmean_clusters.shape[0] / K_value)): # the last index: (int(kmean_clusters.shape[0] / K_value) - 1) start_index = draw_ind * K_value iter_kmean_clusters = kmean_clusters[start_index:start_index + K_value, :] # label assignment out_label, cov_list = label_assignment(iter_kmean_clusters, training_data) # save the figures plt.rcParams.update({'figure.max_open_warning': 0}) fig, ax = plt.subplots() ax.scatter(training_data[:, 0], training_data[:, 1], c=out_label, alpha=0.5) ax.scatter(iter_kmean_clusters[:, 0], iter_kmean_clusters[:, 1], c='b', s=100, alpha=0.5) plt.xlabel('Feature: x1') plt.ylabel('Feature: x2') plt.title('K-mean Clustering') fig.savefig('./kmean_result/iter' + str(draw_ind) + '.png') #fig.clf() elif (sys.argv[1] == 'gmm'): # read minimum SSE Position min_sse_pos = np.loadtxt('./min_sse_pos.csv', delimiter=',', skiprows=0) # read clusters from K-mean algorithm kmean_name = './all_cluster_center' + str(int(min_sse_pos)) + '.csv' kmean_clusters = np.loadtxt(kmean_name, delimiter=',', skiprows=0) start_index = (int(kmean_clusters.shape[0] / K_value) - 1) * K_value iter_kmean_clusters = kmean_clusters[start_index:start_index + K_value, :] out_label, cov_list = label_assignment(iter_kmean_clusters, training_data) # call GMM class gmm = GMM(K_value, repeat_num_gmm, max_epoch, training_data) all_likelihood, parameters = gmm.model_training( iter_kmean_clusters, out_label, cov_list) true_mu, true_covariance = gmm.fit_true_model(training_data) # find the mu, covariance, and prior best_likelihood, all_mu, all_cov, all_prior = find_the_best( all_likelihood, parameters) # prediction phase prediction = gmm.model_predict(all_mu[-1], all_cov[-1], all_prior[-1], training_data) labels = label_GMM(prediction) # drawing gaussian functions for ind in range(len(all_mu)): out_para = drawing_Gaussian(all_mu[ind], all_cov[ind], training_data, all_mu[-1], 1, ind) if ind == (len(all_mu) - 1): # print out parameters of the Gaussian function for ind_2 in range(K_value): print('Cluster:', ind_2) print('Mu:') print(out_para[str(ind_2)][0]) print('Covariance:') print(out_para[str(ind_2)][1]) print('=====================') # drawing true gaussian functions if K_value == 3: out_param_true = drawing_Gaussian(true_mu, true_covariance, training_data, true_mu, 2, None) # print out parameters of the Gaussian function for ind_3 in range(K_value): print('Cluster:', ind_3) print('Actual Mu:') print(out_param_true[str(ind_3)][0]) print('Actual Covariance:') print(out_param_true[str(ind_3)][1]) print('=====================') # drawing log-likelihood values drawing_Log_likelihood(best_likelihood) elif (sys.argv[1] == 'saving'): # save the data as a csv file save_data_csv_file(training_data) else: print('Error Input. Please re-choose the task!!')
mask = np.zeros_like(img) mask[:, :, 0] = probsReshaped mask[:, :, 1] = probsReshaped mask[:, :, 2] = probsReshaped return mask if __name__ == '__main__': # Loading models path = 'models/' files = os.listdir(path) paths = ['Green_Resized/', 'Orange_Resized/', 'Yellow_Resized/'] gmms = [GMM(nClusters=3), GMM(nClusters=3), GMM(nClusters=3)] # TODO gParams, gMixture = None, None oParams, oMixture = None, None yParams, yMixture = None, None for file in files: filename = os.path.join(path, file) pickle_in = open(filename, "rb") model = pickle.load(pickle_in) for key, info in model.items(): if key == paths[0]: gParams = info[0] gMixture = info[1]
@brief TBD @license This project is released under the BSD-3-Clause license. ''' import numpy as np import cv2 from scipy.stats import multivariate_normal from GMM import GMM #image = cv2.imread('training_set/yellowTrainingSet.png') image = cv2.imread('test_set/buoys.png') height = image.shape[0] width = image.shape[1] yellowGMM = GMM() yellowGMM.load('yellowGMM.npz') orangeGMM = GMM() orangeGMM.load('orangeGMM.npz') greenGMM = GMM() greenGMM.load('greenGMM.npz') ''' yellowErrors = np.zeros((height, width)) orangeErrors = np.zeros((height, width)) greenErrors = np.zeros((height, width)) for i in range(height): for j in range(width): yellowErros[i] = yellowGMM.getLogLikelihoodError(image[i,j])
# connectivity matrix for structured Ward connectivity = kneighbors_graph(X, n_neighbors=params['n_neighbors'], include_self=False) # make connectivity symmetric connectivity = 0.5 * (connectivity + connectivity.T) # ============ # 初始化所有聚类算法 # ============ # 自编的K-Means、GMM算法 my_kmeans0 = K_Means(n_clusters=params['n_clusters'], fit_method=0) my_kmeans1 = K_Means(n_clusters=params['n_clusters'], fit_method=1) my_kmeans2 = K_Means(n_clusters=params['n_clusters'], fit_method=2) my_kmeans3 = K_Means(n_clusters=params['n_clusters'], fit_method=3) my_gmm = GMM(n_clusters=params['n_clusters'], dim=X.shape[1]) my_spectral_knn_reciprocal_normalized = SpectralClustering( n_clusters=params['n_clusters'], nnk=50) my_spectral_radius_reciprocal_normalized = SpectralClustering( n_clusters=params['n_clusters'], use_radius_nn=True, nnradius=1) my_spectral_knn_gauss05_normalized = SpectralClustering( n_clusters=params['n_clusters'], nnk=50, use_gauss_dist=True) my_spectral_knn_gauss005_normalized = SpectralClustering( n_clusters=params['n_clusters'], nnk=50, use_gauss_dist=True, gauss_sigma=5e-2) my_spectral_knn_reciprocal_unnormalized = SpectralClustering( n_clusters=params['n_clusters'], nnk=50, normalized=False) # sklearn中自带的算法
import numpy as np import matplotlib.pyplot as plt from GMM import GMM if __name__ == '__main__': group_a = np.random.normal(loc=(20.00, 14.00), scale=(4.0, 4.0), size=(1000, 2)) group_b = np.random.normal(loc=(15.00, 8.00), scale=(2.0, 2.0), size=(1000, 2)) group_c = np.random.normal(loc=(30.00, 40.00), scale=(2.0, 2.0), size=(1000, 2)) group_d = np.random.normal(loc=(25.00, 32.00), scale=(7.0, 7.0), size=(1000, 2)) group_e = np.random.normal(loc=(10.00, 32.00), scale=(7.0, 7.0), size=(1000, 2)) DATA = np.concatenate((group_a, group_b, group_c, group_d, group_e)) S = GMM(5, DATA, 1e-3) S.fit() S.print_status() testdata = np.random.rand(10000, 2)*50 labels = S.Classify(testdata) plt.scatter(testdata[:, 0], testdata[:, 1], c=list(map(lambda i : {0:'b',1:'g',2:'r',3:'y',4:'k'}[i], labels))) plt.show()
""" chapter9 EM algorithm for Gaussian Misture Model Using iris dataset for clustering """ import DatasetUtil as DS from HTMLTable import HTMLTable import re from GMM import GMM if __name__ == "__main__": print("\t============ Chap9 EM for GMM ============") ds = DS.DATAUtil() x_train, y_train = ds.load(True, r".\dataset.dat") model = GMM() model.train(x_train) y_pred = model.predict(x_train) y_train = ds.y_int2str(y_train) table = HTMLTable(caption='Iris Data Cluster') table.append_header_rows(( ('No.', 'A1', 'A2', 'A3', 'A4', 'Classification', ''), ('', '', '', '', '', 'Label-C', 'Predict-C'), )) table[0][0].attr.rowspan = 2 table[0][1].attr.rowspan = 2 table[0][2].attr.rowspan = 2 table[0][3].attr.rowspan = 2 table[0][4].attr.rowspan = 2
from GMM import GMM from sklearn.datasets import make_blobs import matplotlib.pyplot as plt X, y = make_blobs(n_samples=1000, centers=4, n_features=2) gmm_cls = GMM(initializer='uniform', cov_type='diag') gmm_cls.fit(X, 4) colors = [] for l in gmm_cls.kmeans_cls_.predict(X): if l == 0: colors.append('red') if l == 1: colors.append('green') if l == 2: colors.append('orange') if l == 3: colors.append('blue') plt.scatter(X[:, 0], X[:, 1], c=colors, alpha=0.1) plt.scatter(gmm_cls.means_[:, 0], gmm_cls.means_[:, 1], c='k') plt.show() plt.scatter(X[:, 0], X[:, 1], c=colors, alpha=0.1) plt.scatter(gmm_cls.kmeans_cls_.means_[:, 0], gmm_cls.kmeans_cls_.means_[:, 1], c='k') plt.show()
def fit3(self, raw_colors): self._X = [raw_colors[i] for i in Classifier.FACELET_ORDER] np.save('X', self._X) X = np.load('X.npy').astype('uint8') for i in range(len(X)): X[i][0], X[i][2] = X[i][2], X[i][0] X_hsv = X[np.newaxis, ...] hsv = cv2.cvtColor(X_hsv, cv2.COLOR_RGB2HSV)[0] centroid_id = list(range(4, 54, 9)) feats = hsv[:, ::2] ax = pl.subplot(111) for i, c in enumerate(feats): if i % 9 == 4: # print(type(raw_colors[i])) pl.scatter(*feats[i], color=X[i] / 255.0, marker='x') else: pl.scatter(*feats[i], color=X[i] / 255.0, marker='o') pl.show() alphas = np.empty((6, 1)) means = np.empty((6, feats.shape[1])) covs = np.empty((6, feats.shape[1], feats.shape[1])) for i in range(6): mean = feats[9 * i + 4] cov = np.array([[0.5 * 10, 0.0], [0.0, 2.5 * 10]]) alphas[i] = 1 / 6.0 means[i] = mean covs[i] = cov print('alpha:', alphas[i]) print('mean:', mean) print('cov:', cov) self._gmm = GMM(6, feats, mu=means, sigma=covs, alpha=alphas) self._gmm.execute() print(self._gmm.alpha) print(self._gmm.mu) print(self._gmm.sigma) self._ans = [] for xi in feats: probas = np.array([ self._gmm.Normal(xi, self._gmm.mu[k], self._gmm.sigma[k], len(xi)) for k in range(6) ]) self._ans.append(probas.argmax()) print(np.array(self._ans).reshape(6, 3, 3)) def eigsorted(cov): vals, vecs = np.linalg.eigh(cov) order = vals.argsort()[::-1] return vals[order], vecs[:, order] nstd = 2 ax = pl.subplot(111) for i, c in enumerate(self._ans): if i % 9 == 4: pl.scatter(*feats[i], color=X[centroid_id[c]] / 255.0, marker='x') else: pl.scatter(*feats[i], color=X[centroid_id[c]] / 255.0, marker='o') for k in range(6): cov = self._gmm.sigma[k] vals, vecs = eigsorted(cov) theta = np.degrees(np.arctan2(*vecs[:, 0][::-1])) w, h = 2 * nstd * np.sqrt(vals) ell = mpl.patches.Ellipse(xy=self._gmm.mu[k], width=w, height=h, angle=theta, color=X[centroid_id[k]] / 255.0, alpha=0.5) ell.set_facecolor(X[centroid_id[k]] / 255.0) ax.add_artist(ell) for k in range(6): cov = covs[k] vals, vecs = eigsorted(cov) theta = np.degrees(np.arctan2(*vecs[:, 0][::-1])) w, h = 2 * nstd * np.sqrt(vals) ell = mpl.patches.Ellipse(xy=self._gmm.mu[k], width=w, height=h, angle=theta, color=X[centroid_id[k]] / 255.0, alpha=0.5) # ell.set_facecolor(X[centroid_id[k]]/255.0) ax.add_artist(ell) pl.show()
class Classifier(object): """docstring for Classifier.""" FACELET_ORDER = [ 42, 39, 36, 43, 40, 37, 44, 41, 38, # U 9, 10, 11, 12, 13, 14, 15, 16, 17, # R 8, 7, 6, 5, 4, 3, 2, 1, 0, # F 45, 46, 47, 48, 49, 50, 51, 52, 53, # D 27, 28, 29, 30, 31, 32, 33, 34, 35, # L 26, 25, 24, 23, 22, 21, 20, 19, 18, # B ] def __init__(self, face_order='URFDLB'): self._fig = pl.figure() self._axis = self._fig.add_subplot(111, projection='3d') self._face_order = face_order def fit(self, raw_colors): self._X = [raw_colors[i] for i in Classifier.FACELET_ORDER] np.save('X', self._X) for label, color in enumerate(self._X): color[0], color[2] = color[2], color[0] self._axis.scatter(*color, color=color / 384) pl.show() self._X_pca = PCA(n_components=2).fit_transform(self._X) print(self._X_pca) np.save('pca', self._X_pca) for i, point in enumerate(self._X_pca): pl.scatter(*point, color=self._X[i] / 384) pl.show() centroids = np.array([self._X_pca[i] for i in range(4, 54, 9)]) for i, point in enumerate(centroids): pl.scatter(*point, color=self._X[9 * i + 4] / 384) pl.show() print(centroids) self._model = KMeans(n_clusters=6, init=centroids) # self._model = GaussianMixture(n_components=6, means_init=centroids) self._model.fit(self._X_pca) def fit2(self, raw_colors): from sklearn.mixture import GaussianMixture self._X = [raw_colors[i] for i in Classifier.FACELET_ORDER] for label, color in enumerate(self._X): color[0], color[2] = color[2], color[0] self._axis.scatter(*color, color=color / 384) pl.show() centroids = np.array([self._X[i] for i in range(4, 54, 9)]) print(centroids) self._model = GaussianMixture(n_components=6, means_init=centroids) self._model.fit(self._X) self._X_pca = self._X def fit3(self, raw_colors): self._X = [raw_colors[i] for i in Classifier.FACELET_ORDER] np.save('X', self._X) X = np.load('X.npy').astype('uint8') for i in range(len(X)): X[i][0], X[i][2] = X[i][2], X[i][0] X_hsv = X[np.newaxis, ...] hsv = cv2.cvtColor(X_hsv, cv2.COLOR_RGB2HSV)[0] centroid_id = list(range(4, 54, 9)) feats = hsv[:, ::2] ax = pl.subplot(111) for i, c in enumerate(feats): if i % 9 == 4: # print(type(raw_colors[i])) pl.scatter(*feats[i], color=X[i] / 255.0, marker='x') else: pl.scatter(*feats[i], color=X[i] / 255.0, marker='o') pl.show() alphas = np.empty((6, 1)) means = np.empty((6, feats.shape[1])) covs = np.empty((6, feats.shape[1], feats.shape[1])) for i in range(6): mean = feats[9 * i + 4] cov = np.array([[0.5 * 10, 0.0], [0.0, 2.5 * 10]]) alphas[i] = 1 / 6.0 means[i] = mean covs[i] = cov print('alpha:', alphas[i]) print('mean:', mean) print('cov:', cov) self._gmm = GMM(6, feats, mu=means, sigma=covs, alpha=alphas) self._gmm.execute() print(self._gmm.alpha) print(self._gmm.mu) print(self._gmm.sigma) self._ans = [] for xi in feats: probas = np.array([ self._gmm.Normal(xi, self._gmm.mu[k], self._gmm.sigma[k], len(xi)) for k in range(6) ]) self._ans.append(probas.argmax()) print(np.array(self._ans).reshape(6, 3, 3)) def eigsorted(cov): vals, vecs = np.linalg.eigh(cov) order = vals.argsort()[::-1] return vals[order], vecs[:, order] nstd = 2 ax = pl.subplot(111) for i, c in enumerate(self._ans): if i % 9 == 4: pl.scatter(*feats[i], color=X[centroid_id[c]] / 255.0, marker='x') else: pl.scatter(*feats[i], color=X[centroid_id[c]] / 255.0, marker='o') for k in range(6): cov = self._gmm.sigma[k] vals, vecs = eigsorted(cov) theta = np.degrees(np.arctan2(*vecs[:, 0][::-1])) w, h = 2 * nstd * np.sqrt(vals) ell = mpl.patches.Ellipse(xy=self._gmm.mu[k], width=w, height=h, angle=theta, color=X[centroid_id[k]] / 255.0, alpha=0.5) ell.set_facecolor(X[centroid_id[k]] / 255.0) ax.add_artist(ell) for k in range(6): cov = covs[k] vals, vecs = eigsorted(cov) theta = np.degrees(np.arctan2(*vecs[:, 0][::-1])) w, h = 2 * nstd * np.sqrt(vals) ell = mpl.patches.Ellipse(xy=self._gmm.mu[k], width=w, height=h, angle=theta, color=X[centroid_id[k]] / 255.0, alpha=0.5) # ell.set_facecolor(X[centroid_id[k]]/255.0) ax.add_artist(ell) pl.show() def get_state(self): # if self._X is None: # raise Error('Se debe hacer fit primero.') # pred2 = self._model.predict(self._X_pca) # state = ''.join([self._face_order[i] for i in pred2]) # print(state) # pred2 = pred2.reshape(-1, 3, 3) # print(pred2) # return state FACES = 'URFDLB' state = ''.join([FACES[i] for i in np.array(self._ans).reshape(-1)]) print(state) return state
class BuoyDetector: yellowGMM = GMM() orangeGMM = GMM() greenGMM = GMM() def __init__(self, yellowGMMParams, orangeGMMParams, greenGMMParams): self.yellowGMM.load(yellowGMMParams) self.orangeGMM.load(orangeGMMParams) self.greenGMM.load(greenGMMParams) def detectBuoys(self, frame): image = frame.copy() height = image.shape[0] width = image.shape[1] #print('BP1') yellowMask = np.zeros((height, width)).astype('uint8') orangeMask = np.zeros((height, width)).astype('uint8') greenMask = np.zeros((height, width)).astype('uint8') #print('BP2') yellowErrors = self.yellowGMM.getLogLikelihoodError(np.reshape(image, (height*width, 3))) orangeErrors = self.orangeGMM.getLogLikelihoodError(np.reshape(image, (height*width, 3))) greenErrors = self.greenGMM.getLogLikelihoodError(np.reshape(image, (height*width, 3))) yellowErrors = np.reshape(yellowErrors, (height, width)) orangeErrors = np.reshape(orangeErrors, (height, width)) greenErrors = np.reshape(greenErrors, (height, width)) for i in range(height): for j in range(width): yellowError = yellowErrors[i,j] orangeError = orangeErrors[i,j] greenError = greenErrors[i,j] if (yellowError > 11 and orangeError > 14 and greenError > 12.5): continue elif (yellowError == min(yellowError, orangeError, greenError)): yellowMask[i,j] = 255 elif (orangeError == min(yellowError, orangeError, greenError)): orangeMask[i,j] = 255 elif (greenError == min(yellowError, orangeError, greenError)): greenMask[i,j] = 255 yellowMask = cv2.erode(yellowMask, None, iterations=1) yellowMask = cv2.dilate(yellowMask, None, iterations=2) orangeMask = cv2.erode(orangeMask, None, iterations=1) orangeMask = cv2.dilate(orangeMask, None, iterations=2) greenMask = cv2.erode(greenMask, None, iterations=1) greenMask = cv2.dilate(greenMask, None, iterations=2) yellowContours, hierarchy = cv2.findContours(yellowMask, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) orangeContours, hierarchy = cv2.findContours(orangeMask, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) greenContours, hierarchy = cv2.findContours(greenMask, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) if (len(yellowContours) != 0): maxContour = max(yellowContours, key = cv2.contourArea) center, radius = cv2.minEnclosingCircle(maxContour) cv2.circle(image, (int(center[0]), int(center[1])), int(radius), \ color=(0, 255, 255), thickness=2) cv2.circle(image, (int(center[0]), int(center[1])), 1, \ color=(0, 0, 255), thickness=1) #cv2.drawContours(image, [maxContour], contourIdx=-1, color=(0, 255, 255), thickness=2) if (len(orangeContours) != 0): maxContour = max(orangeContours, key = cv2.contourArea) center, radius = cv2.minEnclosingCircle(maxContour) cv2.circle(image, (int(center[0]), int(center[1])), int(radius), \ color=(0, 125, 255), thickness=2) cv2.circle(image, (int(center[0]), int(center[1])), 1, \ color=(0, 0, 255), thickness=1) #cv2.drawContours(image, [maxContour], contourIdx=-1, color=(0, 125, 255), thickness=2) if (len(greenContours) != 0): maxContour = max(greenContours, key = cv2.contourArea) center, radius = cv2.minEnclosingCircle(maxContour) cv2.circle(image, (int(center[0]), int(center[1])), int(radius), \ color=(0, 255, 0), thickness=2) cv2.circle(image, (int(center[0]), int(center[1])), 1, \ color=(0, 0, 255), thickness=1) #cv2.drawContours(image, [maxContour], contourIdx=-1, color=(0, 255, 0), thickness=2) return image def runApplication(self, videoFile, saveVideo=False): # Create video stream object videoCapture = cv2.VideoCapture(videoFile) # Define video codec and output file if video needs to be saved if (saveVideo == True): fourcc = cv2.VideoWriter_fourcc(*'mp4v') # 720p 30fps video out = cv2.VideoWriter('BuoyDetection.mp4', fourcc, 30, (1280, 720)) # Continue to process frames if the video stream object is open while(videoCapture.isOpened()): ret, frame = videoCapture.read() # Continue processing if a valid frame is received if ret == True: newFrame = self.detectBuoys(frame) # Save video if desired, resizing frame to 720p if (saveVideo == True): out.write(cv2.resize(newFrame, (1280, 720))) # Display frame to the screen in a video preview cv2.imshow("Frame", cv2.resize(newFrame, (1280, 720))) # Exit if the user presses 'q' if cv2.waitKey(1) & 0xFF == ord('q'): break # If the end of the video is reached, wait for final user keypress and exit else: cv2.waitKey(0) break # Release video and file object handles videoCapture.release() if (saveVideo == True): out.release() print('Video and file handles closed')
kmeans_obj = KMeans(3, x) kmeans_obj.fit(3, 0.002) means = kmeans_obj.mean_vec cov_mat_list = kmeans_obj.CovMatrix() mixture_coeff = kmeans_obj.MixtureCoeff() print(cov_mat_list) """from sklearn.cluster import KMeans obj = KMeans(n_clusters = 3, init = 'k-means++', max_iter = 100, n_init = 10, random_state = 0) y_Kmeans = obj.fit_predict(x) print(obj.cluster_centers_[:])""" GMM_obj = GMM(3, x, means, cov_mat_list, mixture_coeff) GMM_obj.fit(0.0002) print(GMM_obj.mean_vec) print(GMM_obj.cov_mat) print(GMM_obj.mixture_coeff) y_pred = GMM_obj.ClusterPredict(x) plt.scatter(GMM_obj.x_train[y_pred == 0, 0], GMM_obj.x_train[y_pred == 0, 1], s = 20, c = 'red', label = 'Cluster 1') plt.scatter(GMM_obj.x_train[y_pred == 1, 0], GMM_obj.x_train[y_pred == 1, 1], s = 20, c = 'green', label = 'Cluster 2') plt.scatter(GMM_obj.x_train[y_pred == 2, 0], GMM_obj.x_train[y_pred == 2, 1], s = 20, c = 'blue', label = 'Cluster 3') plt.scatter(GMM_obj.mean_vec[:, 0], GMM_obj.mean_vec[:, 1], s = 50, c = 'yellow', label = 'Centroids') plt.show() plt.scatter(GMM_obj.x_train[:, 0], GMM_obj.x_train[:, 1]) plt.show()
def xai_feature(self, samp_num, option= 'None'): """extract the important features from the input data Arg: fea_num: number of features that needed by the user samp_num: number of data used for explanation return: fea: extracted features """ print '----------------------------------------------------' print "parameters:" print "data:",self.data print "data shape:", self.data.shape print "seq_len:", self.seq_len print "start:", self.start print "sp:", self.sp print "real_sp:", self.real_sp print "pred:", self.pred print "trunc_len:", self.tl print "trunc_data",self.trunc_data print "trunc_data_test", self.trunc_data_test print '----------------------------------------------------' cen = self.seq_len/2 half_tl = self.tl/2 sample = np.random.randint(1, self.tl+1, samp_num) print "sample:",sample features_range = range(self.tl+1) data_explain = np.copy(self.trunc_data).reshape(1, self.trunc_data.shape[0]) data_sampled = np.copy(self.trunc_data_test) for i, size in enumerate(sample, start=1): inactive = np.random.choice(features_range, size, replace=False) #print '\ninactive --->',inactive tmp_sampled = np.copy(self.trunc_data) tmp_sampled[inactive] = 0 #tmp_sampled[inactive] = np.random.choice(range(257), size, replace = False) #print "trunc_data.shape", self.trunc_data.shape tmp_sampled = tmp_sampled.reshape(1, self.trunc_data.shape[0]) data_explain = np.concatenate((data_explain, tmp_sampled), axis=0) #print "data_explain.shape", data_explain.shape data_sampled_mutate = np.copy(self.data) if self.real_sp < half_tl: data_sampled_mutate[0, 0:tmp_sampled.shape[1]] = tmp_sampled elif self.real_sp >= self.seq_len - half_tl: data_sampled_mutate[0, (self.seq_len - tmp_sampled.shape[1]): self.seq_len] = tmp_sampled else: data_sampled_mutate[0, (self.real_sp - half_tl):(self.real_sp + half_tl + 1)] = tmp_sampled data_sampled = np.concatenate((data_sampled, data_sampled_mutate),axis=0) if option == "Fixed": print "Fix start points" data_sampled[:, self.real_sp] = self.start label_sampled = self.model.predict(data_sampled, verbose = 0)[:, self.real_sp, 1] label_sampled = label_sampled.reshape(label_sampled.shape[0], 1) #X = r.matrix(data_explain, nrow = data_explain.shape[0], ncol = data_explain.shape[1]) #Y = r.matrix(label_sampled, nrow = label_sampled.shape[0], ncol = label_sampled.shape[1]) #n = r.nrow(X) #print "n:", n #p = r.ncol(X) #print "p:", p #print "np.sqrt(n*np.log(p)):", np.sqrt(n*np.log(p)) #print "X_shape", X.dim #print "Y_shape", Y.dim #Mixture model fitting gmm = GMM(label_sampled,n_components=2).fit(data_explain) print gmm.converged_ means = gmm.means_ covariances = gmm.covariances_ r_ik = np.zeros((samp_num+1,2)) k=-1 for m,c in zip(means,covariances): k += 1 reg_cov = 5e-5*np.identity(self.tl+1) c = c + reg_cov #print "C:", c multi_normal = multivariate_normal(mean=m,cov=c) r_ik[:,k] = gmm.weights_[k] * multi_normal.pdf(data_explain) #mat_norm = np.zeros((501,501)) #np.fill_diagonal(mat_norm, 1/np.sum(r_ik,axis=1)) #P = mat_norm.dot(r_ik) res = np.argmax(r_ik, axis=1) # find the index for the best component best_component_idx = res[0] # fitting beta according to best component of mixture regression model # get the data for this component idx=np.where(res==best_component_idx)[0] X = r.matrix(data_explain[idx], nrow = len(idx), ncol = self.tl+1) Y = r.matrix(label_sampled[idx], nrow = len(idx), ncol = 1) n = r.nrow(X) print "n:", n p = r.ncol(X) print "p:", p print "np.sqrt(n*np.log(p)):", np.sqrt(n*np.log(p)) # solve fused lasso by r library and get the importance score from the results print "X_shape", X.dim print "Y_shape", Y.dim results = r.fusedlasso1d(y=Y, X=X) #print "result_i", result_i result = np.array(r.coef(results, np.sqrt(n*np.log(p)))[0])[:,-1] print "result:", result #results = r.fusedlasso1d(y=Y,X=X) #result = np.array(r.coef(results, np.sqrt(n*np.log(p)))[0])[:,-1] # sorting the importance_score and return the important features importance_score = np.argsort(result)[::-1] print 'importance_score ...',importance_score self.fea = (importance_score-self.tl/2)+self.real_sp self.fea = self.fea[np.where(self.fea<200)] self.fea = self.fea[np.where(self.fea>=0)] print 'self.fea ...',self.fea return self.fea
BETA = 1 # set random seed to reproduce tf.random.set_seed(SEED) np.random.seed(SEED) #Get data log = logger() label_name = os.path.join("data", "labels") data_name = os.path.join("data", "data_set") labels = log.unpickle(label_name) labels = np.expand_dims(labels, axis=-1) data_set = log.unpickle(data_name) #Initialize the models and optimizers gmm_model = GMM(data_set.shape[1], K=K) optimizer = tf.keras.optimizers.Adam( learning_rate=0.0001) #Low learning rate needed! #train function @tf.function #--> optimizes the program by making a graph def train_models(data, label): with tf.GradientTape(persistent=True) as tape: sample, prob, mean, logvar = gmm_model(label) log_likelihood = gmm_model.log_likelihood(data, prob, mean, logvar) grad = tape.gradient(log_likelihood, gmm_model.variables) optimizer.apply_gradients(zip(grad, gmm_model.variables)) del tape return log_likelihood
plt.plot(x2[:, 0], x2[:, 1], 'o') plt.plot(np.mean(x2[:, 0]), np.mean(x2[:, 1]), 'x', color='black') plt.text(np.mean(x2[:, 0]), np.mean(x2[:, 1]), '$\mu_{e}$') plt.xlabel('x') plt.ylabel('y') plt.title('Labelled data and corresponding means') plt.show() M = 2 max_iter = 200 tol = 1e-3 diagonal = False gmm = GMM(X, M) # run the K-means algorithm firstly to initialize the means of the GMM algorithm # mu_0 = random.sample(list(X), M) # mu_0, D = k_means(X, M, mu_0=mu_0, max_iter=max_iter, tol=tol, interactive=False) # 1.) EM algorithm for GMM: # TODO L = gmm.EM(max_iter=max_iter, tol=tol, interactive=False, diagonal=False) plt.ioff() plt.plot(L) plt.xlabel('Iteration') plt.ylabel('Value') plt.title('EM log-likelihood function') plt.show()
CURL_TRAIN_SIZE = 50 * N_CENTERS CURL_TEST_SIZE = 1000 BATCH_SIZE = 100 # Model INPUT_DIM = DIM HIDDEN_DIM = 20 OUT_DIM = HIDDEN_DIM # Training N_EPOCH = 1000 LR = 1e-3 # Data generation CENTERS = torch.randn(N_CENTERS * DIM).view(N_CENTERS, DIM) gmm = GMM(DIM, CENTERS, VARIANCE) X_train, y_train = gmm.sample(TRAIN_SAMPLES) X_test, y_test = gmm.sample(TEST_SAMPLES) train_CURL = ContrastiveDataset( *build_CURL_dataset(X_train, y_train, CURL_TRAIN_SIZE)) assert len(train_CURL) == CURL_TRAIN_SIZE test_CURL = ContrastiveDataset( *build_CURL_dataset(X_test, y_test, CURL_TEST_SIZE)) train_data = GMMDataset(X_train, y_train) test_data = GMMDataset(X_test, y_test) train_loader = DataLoader(train_data, shuffle=True, batch_size=BATCH_SIZE) test_loader = DataLoader(test_data, shuffle=False, batch_size=BATCH_SIZE)
def main(): Parser = argparse.ArgumentParser() Parser.add_argument('datagenModule', type=str) Parser.add_argument('algName', type=str) #TODO:Parser.add_argument( 'modelName', type=str ) Parser.add_argument('-K', '--K', type=int, default=3) Parser.add_argument('--alpha0', type=float, default=1.0) Parser.add_argument('--covar_type', type=str, default='full') Parser.add_argument('--min_covar', type=float, default=1e-9) # Batch learning args Parser.add_argument('--nIter', type=int, default=100) # Online learning args Parser.add_argument('--batch_size', type=int, default=100) Parser.add_argument('--nBatch', type=int, default=50) Parser.add_argument('--nRep', type=int, default=1) Parser.add_argument('--rhoexp', type=float, default=0.5) Parser.add_argument('--rhodelay', type=float, default=1) # Generic args Parser.add_argument('--jobname', type=str, default='defaultjob') Parser.add_argument('--taskid', type=int, default=1) Parser.add_argument('--nTask', type=int, default=1) Parser.add_argument('--initname', type=str, default='random') Parser.add_argument('--seed', type=int, default=8675309) Parser.add_argument('-v', '--doVerbose', action='store_true', default=False) Parser.add_argument('--printEvery', type=int, default=5) Parser.add_argument('--saveEvery', type=int, default=10) Parser.add_argument('--doProfile', action='store_true', default=False) args = Parser.parse_args() modelParams = dict() for argName in ['K', 'covar_type', 'min_covar', 'alpha0']: modelParams[argName] = args.__getattribute__(argName) dataParams = dict() for argName in ['nBatch', 'nRep', 'batch_size', 'seed']: dataParams[argName] = args.__getattribute__(argName) algParams = dict() for argName in ['initname', 'nIter', 'rhoexp', 'rhodelay', \ 'nIter', 'printEvery', 'saveEvery']: algParams[argName] = args.__getattribute__(argName) # Dynamically load module provided by user as data-generator # this must implement a generator function called "minibatch_generator" or "get_data" datagenmod = __import__('GMM.data.' + args.datagenModule, fromlist=['GMM', 'data']) if 'print_data_info' in dir(datagenmod): datagenmod.print_data_info() gmm = GMM.GMM(**modelParams) gmm.print_model_info() for task in xrange(args.taskid, args.taskid + args.nTask): basepath = os.path.join('results', args.algName, args.jobname, str(task)) mkpath(basepath) algParams['savefilename'] = os.path.join(basepath, 'trace') seed = hash(args.jobname + str(task)) % np.iinfo(int).max algParams['seed'] = seed print 'Trial %2d/%d | savefile: %s | seed: %d' % ( task, args.nTask, algParams['savefilename'], algParams['seed']) if args.algName.startswith('o') and args.algName.count('EM') > 0: DataGen = datagenmod.minibatch_generator(**dataParams) gmm = GMM.GMM(**modelParams) em = OEM.OnlineEMLearnerGMM(gmm, **algParams) em.fit(DataGen, seed) elif args.algName.count('sklearnEM') > 0: sklgmm = sklearn.mixture.GMM( n_components=args.K, random_state=seed, covariance_type=args.covar_type, \ min_covar=args.min_covar, n_init=1, n_iter=args.nIter, init_params='' ) X = datagenmod.get_data(**dataParams) gmm = GMM.GMM(**modelParams) em = EM.EMLearnerGMM(gmm, **algParams) em.init_params(X, seed=seed) sklgmm.weights_ = gmm.w sklgmm.means_ = gmm.mu sklgmm.covars_ = gmm.Sigma sklgmm.fit(X) elif args.algName.count('EM') > 0: Data = datagenmod.get_data(**dataParams) gmm = GMM.GMM(**modelParams) em = EM.EMLearnerGMM(gmm, **algParams) em.fit(Data, seed) elif args.algName.count('DPVB') > 0: Data = datagenmod.get_data(**dataParams) D = Data.shape[1] gw = GaussWishDistr.GaussWishDistr(D=D) qdp = QDPGMM.QDPGMM(gw, **modelParams) em = VB.VBLearnerGMM(qdp, **algParams) em.fit(Data, seed) elif args.algName.count('VB') > 0: Data = datagenmod.get_data(**dataParams) D = Data.shape[1] dF = D + 1 invW = np.eye(D) gw = GaussWishDistr.GaussWishDistr(D=D) qgmm = QGMM.QGMM(gw, **modelParams) em = VB.VBLearnerGMM(qgmm, **algParams) em.fit(Data, seed)
from GMM import GMM from sklearn import mixture # generate the dataset X, Y = make_classification(n_samples=1000, n_features=2, n_redundant=0, n_informative=2, n_clusters_per_class=2) X = preprocessing.scale(X) num_clusters = 3 num_epochs = 50 gmm_model = GMM() phi, pi_dist, mean, covariance = gmm_model.fit(X, num_clusters=num_clusters, num_epochs=num_epochs) gmm_sklearn = mixture.GaussianMixture(n_components=2) gmm_sklearn.fit(X) plt.figure(figsize=(8, 8)) plt.subplots_adjust(left=0.05, bottom=0.05, right=0.95, top=0.9) plt.subplot(211) plt.title('Plot for the unclustered data', fontsize='small') plt.scatter(X[:, 0], X[:, 1], s=25, c=None) plt.subplot(212) plt.title('Plot for the clustered data', fontsize='small')
i = np.random.randint(0, nSamples-1, nComp) means = samples[i] covars = np.empty((nComp, 3), np.float32) covars[:] = 10 gmm_cpu = mixture.GMM(nComp) gmm_cpu.dtype = np.float32 gmm_cpu.init_params = '' gmm_cpu.means_ = means gmm_cpu.weights_ = weights gmm_cpu.covars_ = covars gmm_cpu.fit(samples) gmm = GMM(context, nIter, nComp, nSamples) a = calcA_cpu(weights, means, covars) cl.enqueue_copy(queue, gmm.dA, a).wait() gmm.has_preset_wmc = True w,m,c = gmm.fit(dSamples, nSamples, retParams=True) print 'converged: {0}'.format(gmm.has_converged) print gmm_cpu.weights_ print w print print gmm_cpu.means_ print m print print gmm_cpu.covars_
# 2. 再分类 #======================================================================= from GMM import GMM import numpy as np from sklearn.datasets import make_moons from sklearn.model_selection import train_test_split import matplotlib.pyplot as plt from util import * # 构造聚类数据,X是特征数据,Y是相应的label,此时生成的是半环形图 X, Y = make_moons(n_samples=1000, noise=0.04, random_state=0) # 划分数据,一部分用于训练聚类,一部分用于分类 X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2) model = GMM(X_train, K=10) # 获取各个类别的概率 result = model.fit() print('每条数据属于各个类别的概率如下: ', result) # 获取每条数据所在的类别 label_train = np.argmax(result, axis=1) print(label_train) # 获取测试数据所在的类别的概率 result_test = model.predict(X_test) # 获取测试数据的类别 label_test = np.argmax(result_test, axis=1) # 展示原始数据分布及其label ax1 = plt.subplot(211)
import numpy as np import matplotlib.pyplot as plt from GMM import GMM if __name__ == '__main__': group_a = np.random.normal(loc=(20.00, 14.00), scale=(4.0, 4.0), size=(1000, 2)) group_b = np.random.normal(loc=(15.00, 8.00), scale=(2.0, 2.0), size=(1000, 2)) group_c = np.random.normal(loc=(30.00, 40.00), scale=(2.0, 2.0), size=(1000, 2)) group_d = np.random.normal(loc=(25.00, 32.00), scale=(7.0, 7.0), size=(1000, 2)) data = np.concatenate((group_a, group_b, group_c, group_d)) g = GMM(n_components=4) eval_train = g.train(data) for c in g.components: print '*****' print c.mean print c.cov plt.plot(eval_train) plt.show()
# estimate bandwidth for mean shift bandwidth = cluster.estimate_bandwidth(X, quantile=params['quantile']) # connectivity matrix for structured Ward connectivity = kneighbors_graph(X, n_neighbors=params['n_neighbors'], include_self=False) # make connectivity symmetric connectivity = 0.5 * (connectivity + connectivity.T) # ============ # 初始化所有聚类算法 # ============ # 自编的K-Means、GMM算法 my_kmeans = K_Means(n_clusters=params['n_clusters']) my_gmm = GMM(n_clusters=params['n_clusters']) my_spec = Spectral() # sklearn中自带的算法 ms = cluster.MeanShift(bandwidth=bandwidth, bin_seeding=True) two_means = cluster.MiniBatchKMeans(n_clusters=params['n_clusters']) ward = cluster.AgglomerativeClustering(n_clusters=params['n_clusters'], linkage='ward', connectivity=connectivity) spectral = cluster.SpectralClustering(n_clusters=params['n_clusters'], eigen_solver='arpack', affinity="nearest_neighbors") dbscan = cluster.DBSCAN(eps=params['eps']) optics = cluster.OPTICS(min_samples=params['min_samples'], xi=params['xi'], min_cluster_size=params['min_cluster_size']) affinity_propagation = cluster.AffinityPropagation(
class QuickBrush(Brush): lWorksize = (16, 16) def __init__(self, context, devices, d_img, d_labels): Brush.__init__(self, context, devices, d_labels) self.context = context self.queue = cl.CommandQueue(context, properties=cl.command_queue_properties.PROFILING_ENABLE) nComponentsFg = 4 nComponentsBg = 4 self.nDim = 3 self.dim = d_img.dim filename = os.path.join(os.path.dirname(__file__), 'quick.cl') program = createProgram(context, context.devices, [], filename) # self.kernSampleBg = cl.Kernel(program, 'sampleBg') self.kern_get_samples = cl.Kernel(program, 'get_samples') self.lWorksize = (16, 16) self.gWorksize = roundUp(self.dim, self.lWorksize) nSamples = 4 * (self.gWorksize[0] / self.lWorksize[0]) * ( self.gWorksize[1] / self.lWorksize[1]) # self.gmmFg_cpu = mixture.GMM(4) self.gmmFg = GMM(context, 65, nComponentsFg, 10240) self.gmmBg = GMM(context, 65, nComponentsBg, nSamples) self.hScore = np.empty(self.dim, np.float32) self.hSampleFg = np.empty((10240, ), np.uint32) self.hSampleBg = np.empty((12000, ), np.uint32) self.hA = np.empty((max(nComponentsFg, nComponentsBg), 8), np.float32) self.d_img = d_img cm = cl.mem_flags self.dSampleFg = cl.Buffer(context, cm.READ_WRITE, size=4 * 10240) self.dSampleBg = cl.Buffer(context, cm.READ_WRITE, size=4 * 12000) self.dA = cl.Buffer(context, cm.READ_ONLY | cm.COPY_HOST_PTR, hostbuf=self.hA) self.dScoreFg = Buffer2D(context, cm.READ_WRITE, self.dim, np.float32) self.dScoreBg = Buffer2D(context, cm.READ_WRITE, self.dim, np.float32) #self.points = Set() self.capPoints = 200 * 200 * 300 #brush radius 200, stroke length 300 self.points = np.empty((self.capPoints), np.uint32) # self.colorize = Colorize.Colorize(clContext, clContext.devices) # self.hTriFlat = self.hTri.reshape(-1) # self.probBg(1200) self.h_img = np.empty(self.dim, np.uint32) self.h_img = self.h_img.ravel() cl.enqueue_copy(self.queue, self.h_img, self.d_img, origin=(0, 0), region=self.dim).wait() self.samples_bg_idx = np.random.randint(0, self.dim[0] * self.dim[1], 12000) self.hSampleBg = self.h_img[self.samples_bg_idx] cl.enqueue_copy(self.queue, self.dSampleBg, self.hSampleBg).wait() w,m,c = self.gmmBg.fit(self.dSampleBg, 300, retParams=True) print w print m print c self.gmmBg.score(self.d_img, self.dScoreBg) pass def draw(self, p0, p1): Brush.draw(self, p0, p1) #self.probFg(x1-20, x1+20, y1-20, y1+20) #return """color = self.colorTri[self.type] #self.argsScore[5] = np.int32(self.nComponentsFg) #seed = [] hasSeeds = False redoBg = False minX = sys.maxint maxX = -sys.maxint minY = sys.maxint maxY = -sys.maxint for point in self.points[0:nPoints]: #if self.hTriFlat[point] != color: self.hTriFlat[point] = color #seed += point hasSeeds = True minX = min(minX, point%self.width) maxX = max(maxX, point%self.width) minY = min(minY, point/self.width) maxY = max(maxY, point/self.width) #if (point[1]*self.width + point[0]) in self.randIdx: # redoBg = True #if redoBg: # self.probBg(0) #if len(seed) == 0: if not hasSeeds: return minX = max(0, minX-DILATE) maxX = min(self.width-1, maxX + DILATE) minY = max(0, minY-DILATE) maxY = min(self.height-1, maxY + DILATE) """ args = [ np.int32(self.n_points), self.d_points, cl.Sampler(self.context, False, cl.addressing_mode.NONE, cl.filter_mode.NEAREST), self.d_img, self.dSampleFg ] gWorksize = roundUp((self.n_points, ), (256, )) self.kern_get_samples(self.queue, gWorksize, (256,), *args).wait() cl.enqueue_copy(self.queue, self.hSampleFg, self.dSampleFg) # print self.hSampleFg.view(np.uint8).reshape(10240, 4)[0:self.n_points, :] # print self.n_points self.gmmFg.fit(self.dSampleFg, self.n_points) # print w # print m # print c self.gmmFg.score(self.d_img, self.dScoreFg) # self.argsSampleBg = [ # self.d_labels, # np.int32(self.label), # cl.Sampler(self.context, False, cl.addressing_mode.NONE, # cl.filter_mode.NEAREST), # self.d_img, # self.dSampleFg # ] # # gWorksize = roundUp(self.dim, (16, 16)) # # self.kernSampleBg(self.queue, gWorksize, (16, 16), # *(self.argsSampleBg)).wait() # cl.enqueue_copy(self.queue, self.hSampleBg, self.dSampleBg).wait() pass def probFg(self, d_samples, n_points): # if True: # tri = self.hTri[minY:maxY, minX:maxX] # b = (tri == self.colorTri[self.type]) # # samplesFg = self.hSrc[minY:maxY, minX:maxX] # samplesFg = samplesFg[b] # else: # DILATE = 5 # samplesFg = self.hSrc[minY:maxY, minX:maxX].ravel() #gpu = False #self.prob(self.gmmFG, samplesFg, self.dScoreFg, gpu) #self.gmmFg_cpu.fit(samplesFg) #print 'cpu', self.gmmFg_cpu.weights_ #a = calcA_cpu(self.gmmFg_cpu.weights_.astype(np.float32), self.gmmFg_cpu.means_.astype(np.float32), self.gmmFg_cpu.covars_.astype(np.float32)) #cl.enqueue_copy(self.queue, self.gmmFg.dA, a).wait() #weights, means, covars = self.gmmFg.fit(samplesFg, retParams=True) #a = calcA_cpu(weights, means[:, 0:3], covars[:, 0:3]) #cl.enqueue_copy(self.queue, self.gmmFg.dA, a).wait() w,m,c = self.gmmFg.fit(d_samples, n_points, retParams=True) print w print m print c #print 'gpu', weights self.gmmFg.score(self.d_img, self.dScoreFg) #score returns float64, not float32 -> convert with astype #self.hScore = -self.gmmFG.score(self.rgb.reshape(-1, 3)).astype(np.float32) """ def drawCircle(self, xc, yc, points=None): r = self.radius for y in xrange(-r, r): for x in xrange(-r, r): if points != None: points.add((xc+x, yc+y)) """ def probBg(self, nSamples): #self.kernSampleBg(self.queue, self.gWorksize, self.lWorksize, *(self.argsSampleBg)).wait() #cl.enqueue_copy(self.queue, self.hSampleBg, self.dSampleBg).wait() self.bgIdx = np.where(self.hTri.ravel() != self.colorTri[self.type])[0] self.randIdx = self.bgIdx[np.random.randint(0, len(self.bgIdx), 2000)] self.bgIdx = np.setdiff1d(self.bgIdx, self.randIdx) self.hSampleBg[0:len(self.randIdx)] = self.hSrc.view(np.uint32).ravel()[ self.randIdx] cl.enqueue_copy(self.queue, self.dSampleBg, self.hSampleBg).wait() #print self.gmmBg.fit(self.hSrc.view(np.uint32).ravel()[self.randIdx], retParams=True) self.gmmBg.fit(self.hSrc.view(np.uint32).ravel()[self.randIdx]) #self.gmmBg.fit(self.dSampleBg, nSamples=len(self.randIdx)) self.gmmBg.score(self.dSrc, self.dScoreBg)
#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Created on Sun Apr 21 02:43:24 2019 @author: maachou """ from sklearn.datasets.samples_generator import make_blobs import matplotlib.pyplot as plt from GMM import GMM mix=GMM(K=6) X,Y = make_blobs(cluster_std=0.5,random_state=20,n_samples=100,centers=6) plt.scatter(X[:,0],X[:,1]) print(X.shape) mix.fit(X) mix.Means() Y=mix.predict(X) plt.scatter(X[:,0],X[:,1],c=Y)
def __init__(self, context, devices, d_img, d_labels): Brush.__init__(self, context, devices, d_labels) self.context = context self.queue = cl.CommandQueue(context, properties=cl.command_queue_properties.PROFILING_ENABLE) nComponentsFg = 4 nComponentsBg = 4 self.nDim = 3 self.dim = d_img.dim filename = os.path.join(os.path.dirname(__file__), 'quick.cl') program = createProgram(context, context.devices, [], filename) # self.kernSampleBg = cl.Kernel(program, 'sampleBg') self.kern_get_samples = cl.Kernel(program, 'get_samples') self.lWorksize = (16, 16) self.gWorksize = roundUp(self.dim, self.lWorksize) nSamples = 4 * (self.gWorksize[0] / self.lWorksize[0]) * ( self.gWorksize[1] / self.lWorksize[1]) # self.gmmFg_cpu = mixture.GMM(4) self.gmmFg = GMM(context, 65, nComponentsFg, 10240) self.gmmBg = GMM(context, 65, nComponentsBg, nSamples) self.hScore = np.empty(self.dim, np.float32) self.hSampleFg = np.empty((10240, ), np.uint32) self.hSampleBg = np.empty((12000, ), np.uint32) self.hA = np.empty((max(nComponentsFg, nComponentsBg), 8), np.float32) self.d_img = d_img cm = cl.mem_flags self.dSampleFg = cl.Buffer(context, cm.READ_WRITE, size=4 * 10240) self.dSampleBg = cl.Buffer(context, cm.READ_WRITE, size=4 * 12000) self.dA = cl.Buffer(context, cm.READ_ONLY | cm.COPY_HOST_PTR, hostbuf=self.hA) self.dScoreFg = Buffer2D(context, cm.READ_WRITE, self.dim, np.float32) self.dScoreBg = Buffer2D(context, cm.READ_WRITE, self.dim, np.float32) #self.points = Set() self.capPoints = 200 * 200 * 300 #brush radius 200, stroke length 300 self.points = np.empty((self.capPoints), np.uint32) # self.colorize = Colorize.Colorize(clContext, clContext.devices) # self.hTriFlat = self.hTri.reshape(-1) # self.probBg(1200) self.h_img = np.empty(self.dim, np.uint32) self.h_img = self.h_img.ravel() cl.enqueue_copy(self.queue, self.h_img, self.d_img, origin=(0, 0), region=self.dim).wait() self.samples_bg_idx = np.random.randint(0, self.dim[0] * self.dim[1], 12000) self.hSampleBg = self.h_img[self.samples_bg_idx] cl.enqueue_copy(self.queue, self.dSampleBg, self.hSampleBg).wait() w,m,c = self.gmmBg.fit(self.dSampleBg, 300, retParams=True) print w print m print c self.gmmBg.score(self.d_img, self.dScoreBg) pass
def fit(self, X, K, eps=pow(10, -2)): # fits the parameters of the HMM using EM algorithm # X is the sequence of observations (array of size (T,D)), # K is the number of hidden states # eps : tolerance on log likelihood difference between two iterations for convergence of EM algorithm self.K = K T, D = X.shape # initialization of means and covariances with GMM print( "Initialization of Gaussians parameters (means and covariances) with GMM : " ) gmm_model = GMM(isotropic=False) gmm_model.fit(X, K, eps=eps) self.mus = gmm_model.mus self.Sigmas2 = gmm_model.Sigmas2 print("\nFit of HMM : ") # initialization of pis and A at random self.pis = np.random.rand(self.K) self.pis /= np.sum(self.pis) self.A = np.random.rand(self.K, self.K) self.A /= np.sum(self.A, axis=1)[:, None] lik = self.compute_log_likelihood(X) print("Initial log-likelihood : ", lik) delta_lik = 1 cpt_iter = 1 while (delta_lik > eps): # Expectation step pi = self.compute_proba_Zt_cond_X( X) # array (T,K) (t,i) -> p(z_t = i|X; θ) pij = self.compute_proba_Zt_and_Znext_cond_X( X) # tensor (T-1,K,K) (t,i,j) -> p(z_(t+1) = j, z t = i|X; θ) # Maximization step self.pis = pi[0, :] pi_repeated = pi[:, :, np.newaxis] # (T,K,D) self.mus = np.sum(pi_repeated * X[:, np.newaxis, :], axis=0) / np.sum(pi_repeated, axis=0) self.Sigmas2 = [] for k in range(self.K): Xc = X - self.mus[k] Sigmas2k = 0 for t in range(T): xt = Xc[t, :][:, None] # size (d,1) Sigmas2k += np.dot(xt, xt.T) * pi[t, k] Sigmas2k /= np.sum(pi[:, k]) self.Sigmas2.append(Sigmas2k) self.Sigmas2 = np.array(self.Sigmas2) self.A = np.sum(pij, axis=0) / np.sum(pi[:-1], axis=0)[:, None] # Computing new likelihood, and deciding if we should stop old_lik = lik # storing old_likelihood to compute delta_lik lik = self.compute_log_likelihood(X) # storing new likelihood delta_lik = lik - old_lik # measure to decide if we should stop or iterate again print("Iter " + str(cpt_iter) + " ; log_likelihood : " + str(lik)) cpt_iter += 1 print("EM algorithm converged.") print("initial distribution found (rounded, 2 decimals) : ", np.round(self.pis, 2)) print("transition matrix found (rounded, 2 decimals) : ", np.round(self.A, 2))
def test_data(length, array_length): #txtName = "causal_continue_noise_0.4_normal_sample_1000_length_200.txt" #f = file(txtName, "a+") counter11 = 0 counter10 = 0 counter01 = 0 counter00 = 0 counter11_01 = 0 counter10_01 = 0 counter01_01 = 0 counter00_01 = 0 counter_undecided = 0 counter_true = 0 counter_false = 0 counter_undecided2 = 0 counter_true2 = 0 counter_false2 = 0 counter_error_1 = 0 counter_error_2 = 0 p_array_granger1 = [] p_array_granger2 = [] p_array_CUTE1 = [] p_array_CUTE2 = [] p_array_improve_CUTE1 = [] p_array_improve_CUTE2 = [] p_array1 = [] p_array2 = [] p_array_granger = [] for i in range(0, 1000): write_str = "" p = random.randint(1, 3) #effect, test1 = generate_continue_data(200, p) #cause, effect = generate_continue_data(150, p) #cause_tmp = list(cause) #effect_tmp = list(effect) #cause = zero_change(cause) #effect = zero_change(effect) #cause,effect = generate_continue_data_with_change_lag(350,10) cause = GMM(3, array_length) effect = GMM(5, array_length) cause_tmp = list(cause) effect_tmp = list(effect) #effect = forward_shift_continue_data(cause,p) #noise = np.random.normal(0, 0.1, 200) #for j in range(0, 200): # effect[j] = effect[j] + noise[j] #for i in range(0,len(cause)): #cause[i]=math.tanh(cause[i]) #cause[i] = math.pow(math.e,cause[i]) #effect[i] = math.pow(math.e,effect[i]) #cause[i] = math.pow(cause[i],3)/10 #effect[i] = math.pow(effect[i],3)/10 #effect[i]=math.tanh(effect[i]) #effect[i] = math.pow(effect[i],3) #effect = GMM(8,200) #plt.plot(cause) #plt.plot(effect) #plt.show() #cause = normalize(cause) #effect = normalize(effect) #cause = normalize_data(cause) #effect = normalize_data(effect) #cause = zero_change(cause) #effect = zero_change(effect) from scipy.special import expit #for i in range(0,len(effect)): #effect[i]=expit(effect[i]) #effect[i] = 1.0/effect[i] for ii in range(0, len(cause)): write_str = write_str + " " + str(cause[ii]) for jj in range(0, len(effect)): write_str = write_str + " " + str(effect[jj]) #print "cause:" + str(cause) #print "effect:" + str(effect) # effect, test2 = ge_normal_data(p,200) print "Continuous data, Granger causality test" print "cause->effect" p_value_cause_to_effect1 = [] flag1 = False #ce1 = grangercausalitytests([[effect[i], cause[i]] for i in range(0, len(cause))], p) ce_p = granger(cause, effect, -1) #for key in ce1: # p_value_cause_to_effect1.append(ce1[key][0]["params_ftest"][1]) # if ce1[key][0]["params_ftest"][1] < 0.05: # flag1 = True if ce_p < 0.05: flag1 = True print "effect->cause" p_value_effect_to_cause2 = [] flag2 = False #ce2 = grangercausalitytests([[cause[i], effect[i]] for i in range(0, len(cause))], p) ce2_p = granger(effect, cause, -1) #for key in ce2: # p_value_effect_to_cause2.append(ce2[key][0]["params_ftest"][1]) # if ce2[key][0]["params_ftest"][1] < 0.05: # flag2 = True if ce2_p < 0.05: flag2 = True if ce_p < 0.05: p_array_granger1.append(ce_p) elif ce2_p < 0.05: p_array_granger2.append(ce2_p) if flag1 and flag2: print "Continuous data,Granger two-way cause and effect" write_str = write_str + " " + "连续数据,格兰杰双向因果" counter11 += 1 elif flag1 and not flag2: print "Continuous data,Granger correct cause and effect" write_str = write_str + " " + "连续数据,格兰杰正确因果" counter10 += 1 p_array_granger.append(ce_p) elif not flag1 and flag2: print "Continuous data,Granger wrong cause and effect" write_str = write_str + " " + "连续数据,格兰杰错误因果" counter01 += 1 elif not flag1 and not flag2: print "Continuous data,Granger no cause and effect" write_str = write_str + " " + "连续数据,格兰杰没有因果" #break counter00 += 1 #write_str = write_str + " " + str(min(p_value_cause_to_effect1)) + " " + str(min(p_value_effect_to_cause2)) cause2 = get_type_array(cause, length) effect2 = get_type_array(effect, length) print "01 data, Granger causality test" print "cause->effect" p_value_cause_to_effect3 = [] flag3 = False #ce3 = grangercausalitytests([[effect2[i], cause2[i]] for i in range(0, len(cause2))], p) ce3_p = granger(cause2, effect2, -1) #for key in ce3: # p_value_cause_to_effect3.append(ce3[key][0]["params_ftest"][1]) # if ce3[key][0]["params_ftest"][1] < 0.05: # flag3 = True if ce3_p < 0.05: flag3 = True print "effect->cause" p_value_effect_to_cause4 = [] flag4 = False #ce4 = grangercausalitytests([[cause2[i], effect2[i]] for i in range(0, len(cause2))], p) ce4_p = granger(effect2, cause2, -1) #for key in ce4: # p_value_effect_to_cause4.append(ce4[key][0]["params_ftest"][1]) # if ce4[key][0]["params_ftest"][1] < 0.05: # flag4 = True if ce4_p < 0.05: flag4 = True if flag3 and flag4: print "01 data,Granger two-way cause and effect" write_str = write_str + " " + "离散数据,格兰杰双向因果" counter11_01 += 1 elif flag3 and not flag4: print "01 data,Granger correct cause and effect" write_str = write_str + " " + "离散数据,格兰杰正确因果" counter10_01 += 1 elif not flag3 and flag4: print "01 data,Granger wrong cause and effect" write_str = write_str + " " + "离散数据,格兰杰错误因果" counter01_01 += 1 elif not flag3 and not flag4: print "01 data,Granger no cause and effect" write_str = write_str + " " + "离散数据,格兰杰没有因果" counter00_01 += 1 #write_str = write_str + " " + str(min(p_value_cause_to_effect3)) + " " + str(min(p_value_effect_to_cause4)) print delta_ce = calculate_difference3(cause, effect, length) delta_ec = calculate_difference3(effect, cause, length) print 'cause' + ' -> ' + 'effect' + ':' + str(delta_ce) print 'effect' + ' -> ' + 'cause' + ':' + str(delta_ec) if delta_ce > delta_ec and delta_ce - delta_ec >= -math.log(0.05, 2): print "CUTE,correct cause and effect" write_str = write_str + " " + "CUTE,正确因果" counter_true += 1 elif delta_ec > delta_ce and delta_ec - delta_ce >= -math.log(0.05, 2): print "CUTE,wrong cause and effect" write_str = write_str + " " + "CUTE,错误因果" counter_false += 1 else: print "CUTE,undecided" write_str = write_str + " " + "CUTE,未决定" counter_undecided += 1 write_str = write_str + " " + str(pow(2, -abs(delta_ce - delta_ec))) p = math.pow(2, -(delta_ce - delta_ec)) if p < 1: p_array1.append(p) else: p_array2.append(math.pow(2, -(delta_ec - delta_ce))) #f.write(write_str) #f.write("\n") cause = change_to_zero_one(cause_tmp) effect = change_to_zero_one(effect_tmp) cause2effect = bernoulli2(effect, length) - cbernoulli2( effect, cause, length) effect2cause = bernoulli2(cause, length) - cbernoulli2( cause, effect, length) # print 'cause' + ' -> ' + 'effect' + ':' + str(cause2effect) # print 'effect' + ' -> ' + 'cause' + ':' + str(effect2cause) p = math.pow(2, -(cause2effect - effect2cause)) if p < 1: p_array_improve_CUTE1.append(p) else: p_array_improve_CUTE2.append( math.pow(2, -(effect2cause - cause2effect))) cause2effect = bernoulli(effect) - cbernoulli(effect, cause) effect2cause = bernoulli(cause) - cbernoulli(cause, effect) if p < 1: p_array_CUTE1.append(p) else: p_array_CUTE2.append(math.pow(2, -(effect2cause - cause2effect))) print print "*****************************cut line*****************************" print #f.close() print "连续数据,格兰杰因果关系检验:" print "双向因果:" + str(counter11) print "正确因果:" + str(counter10) print "错误因果:" + str(counter01) print "没有因果" + str(counter00) print "-----------------" print "离散数据,格兰杰因果关系检验:" print "双向因果:" + str(counter11_01) print "正确因果:" + str(counter10_01) print "错误因果:" + str(counter01_01) print "没有因果" + str(counter00_01) print "-----------------" print "discret data,snml causality test:" print "correct cause and effect:" + str(counter_true) print "wrong cause and effect:" + str(counter_false) print "no cause and effect:" + str(counter_undecided) print "-----------------" print "01 data,CUTE causality test:" granger_test = (bh_procedure(p_array_granger1, 0.05) + bh_procedure(p_array_granger2, 0.05)) / 1000.0 ourmodel = (bh_procedure(p_array1, 0.05) + bh_procedure(p_array2, 0.05)) / 1000.0 cute = (bh_procedure(p_array_CUTE1, 0.05) + bh_procedure(p_array_CUTE2, 0.05)) / 1000.0 improve_cute = (bh_procedure(p_array_improve_CUTE1, 0.05) + bh_procedure(p_array_improve_CUTE2, 0.05)) / 1000.0 print granger_test print improve_cute print ourmodel return granger_test, ourmodel, cute, improve_cute